From 5e5bdda4915c8264040a492018799a1292fe80d9 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Tue, 25 Nov 2014 14:37:18 -0800 Subject: [PATCH 001/531] Initial commit --- .gitignore | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..b8bd0267b --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app From d37f38c455b7281e99a29b880c41bfe30927baab Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 25 Nov 2014 16:15:56 -0800 Subject: [PATCH 002/531] initial version of allreduce --- .gitignore | 3 + README.md | 6 + src/allreduce.h | 107 ++++++++++ src/engine.h | 80 +++++++ src/engine_tcp.cpp | 448 ++++++++++++++++++++++++++++++++++++++++ src/io.h | 214 +++++++++++++++++++ src/socket.h | 387 ++++++++++++++++++++++++++++++++++ src/tcp_master.py | 106 ++++++++++ src/utils.h | 176 ++++++++++++++++ submit_job_tcp.py | 36 ++++ test/Makefile | 33 +++ test/test.sh | 7 + test/test_allreduce.cpp | 80 +++++++ 13 files changed, 1683 insertions(+) create mode 100644 README.md create mode 100644 src/allreduce.h create mode 100644 src/engine.h create mode 100644 src/engine_tcp.cpp create mode 100644 src/io.h create mode 100644 src/socket.h create mode 100644 src/tcp_master.py create mode 100644 src/utils.h create mode 100755 submit_job_tcp.py create mode 100644 test/Makefile create mode 100755 test/test.sh create mode 100644 test/test_allreduce.cpp diff --git a/.gitignore b/.gitignore index b8bd0267b..2922a01e6 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,6 @@ *.exe *.out *.app +*~ +*.pyc +test \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 000000000..d4fa97339 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +AllReduce Abstraction +==== +* Tianqi, Nacho, Tianyi + +Go! + diff --git a/src/allreduce.h b/src/allreduce.h new file mode 100644 index 000000000..c9bd0e579 --- /dev/null +++ b/src/allreduce.h @@ -0,0 +1,107 @@ +/*! + * \file allreduce.h + * \brief This file defines a template wrapper of engine to ensure + * \author Tianqi Chen, Nacho, Tianyi + */ +#include "./engine.h" + +/*! \brief namespace of all reduce */ +namespace sync { +/*! \brief namespace of operator */ +namespace op { +struct Max { + template + inline static void Reduce(DType &dst, const DType &src) { + if (dst < src) dst = src; + } +}; +struct Sum { + template + inline static void Reduce(DType &dst, const DType &src) { + dst += src; + } +}; +struct BitOR { + template + inline static void Reduce(DType &dst, const DType &src) { + dst |= src; + } +}; +template +inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) { + const DType *src = (const DType*)src_; + DType *dst = (DType*)dst_; + for (int i = 0; i < len; ++i) { + OP::Reduce(dst[i], src[i]); + } +} +} // namespace op + +void Init(int argc, char *argv[]) { + engine::Init(argc, argv); +} +void Finalize(void) { + engine::Finalize(); +} +/*! \brief get rank of current process */ +inline int GetRank(void) { + return engine::GetEngine()->GetRank(); +} +/*! \brief get total number of process */ +int GetWorldSize(void) { + return engine::GetEngine()->GetWorldSize(); +} +/*! \brief get name of processor */ +std::string GetProcessorName(void) { + return engine::GetEngine()->GetHost(); +} +/*! + * \brief broadcast an std::string to all others from root + * \param sendrecv_data the pointer to send or recive buffer, + * receive buffer does not need to be pre-allocated + * and string will be resized to correct length + * \param root the root of process + */ +inline void Bcast(std::string *sendrecv_data, int root) { + engine::IEngine *e = engine::GetEngine(); + unsigned len = static_cast(sendrecv_data->length()); + e->Broadcast(&len, sizeof(len), root); + sendrecv_data->resize(len); + if (len != 0) { + e->Broadcast(&(*sendrecv_data)[0], len, root); + } +} +/*! + * \brief perform in-place allreduce, on sendrecvbuf + * this function is NOT thread-safe + * Example Usage: the following code gives sum of the result + * vector data(10); + * ... + * AllReduce(&data[0], data.size()); + * ... + * \param sendrecvbuf buffer for both sending and recving data + * \param count number of elements to be reduced + * \tparam OP see namespace op, reduce operator + * \tparam DType type of data + */ +template +inline void AllReduce(DType *sendrecvbuf, size_t count) { + engine::GetEngine()->AllReduce(sendrecvbuf, sizeof(DType), count, op::Reducer); +} +/*! + * \brief load latest check point + * \param p_model pointer to the model + * \return true if there was stored checkpoint and load was successful + * false if there was no stored checkpoint, means we are start over gain + */ +inline bool LoadCheckPoint(utils::ISerializable *p_model) { + return engine::GetEngine()->LoadCheckPoint(p_model); +} +/*! + * \brief checkpoint the model, meaning we finished a stage of execution + * \param p_model pointer to the model + */ +inline void CheckPoint(const utils::ISerializable &model) { + engine::GetEngine()->CheckPoint(model); +} +} // namespace allreduce diff --git a/src/engine.h b/src/engine.h new file mode 100644 index 000000000..ca928b22a --- /dev/null +++ b/src/engine.h @@ -0,0 +1,80 @@ +#ifndef ALLREDUCE_ENGINE_H +#define ALLREDUCE_ENGINE_H +/*! + * \file engine.h + * \brief This file defines the interface of allreduce library + * \author Tianqi Chen, Nacho, Tianyi + */ +#include "./io.h" + +namespace MPI { +/*! \brief MPI data type just to be compatible with MPI reduce function*/ +class Datatype; +} + +/*! \brief namespace of allreduce functionality */ +namespace engine { +/*! \brief interface of core AllReduce engine */ +class IEngine { + public: + /*! + * \brief reduce function, the same form of MPI reduce function is used, + * to be compatible with MPI interface + * In all the functions, the memory is ensured to aligned to 64-bit + * which means it is OK to cast src,dst to double* int* etc + * \param src pointer to source space + * \param dst pointer to destination reduction + * \param count total number of elements to be reduced(note this is total number of elements instead of bytes) + * the definition of reduce function should be type aware + * \param dtype the data type object, to be compatible with MPI reduce + */ + typedef void (ReduceFunction) (const void *src, + void *dst, int count, + const MPI::Datatype &dtype); + /*! + * \brief perform in-place allreduce, on sendrecvbuf + * this function is NOT thread-safe + * \param sendrecvbuf_ buffer for both sending and recving data + * \param type_n4bytes the unit number of bytes the type have + * \param count number of elements to be reduced + * \param reducer reduce function + */ + virtual void AllReduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer) = 0; + /*! + * \brief broadcast data from root to all nodes + * \param sendrecvbuf_ buffer for both sending and recving data + * \param size the size of the data to be broadcasted + * \param root the root worker id to broadcast the data + */ + virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) = 0; + /*! + * \brief load latest check point + * \param p_model pointer to the model + * \return true if there was stored checkpoint and load was successful + * false if there was no stored checkpoint, means we are start over gain + */ + virtual bool LoadCheckPoint(utils::ISerializable *p_model) = 0; + /*! + * \brief checkpoint the model, meaning we finished a stage of execution + * \param p_model pointer to the model + */ + virtual void CheckPoint(const utils::ISerializable &model) = 0; + /*! \brief get rank of current node */ + virtual int GetRank(void) const = 0; + /*! \brief get total number of */ + virtual int GetWorldSize(void) const = 0; + /*! \brief get the host name of current node */ + virtual std::string GetHost(void) const = 0; +}; + +/*! \brief intiialize the engine module */ +void Init(int argc, char *argv[]); +/*! \brief finalize engine module */ +void Finalize(void); +/*! \brief singleton method to get engine */ +IEngine *GetEngine(void); +} // namespace engine +#endif // ALLREDUCE_ENGINE_H diff --git a/src/engine_tcp.cpp b/src/engine_tcp.cpp new file mode 100644 index 000000000..a0506129d --- /dev/null +++ b/src/engine_tcp.cpp @@ -0,0 +1,448 @@ +/*! + * \file engine_tcp.cpp + * \brief implementation of sync AllReduce using TCP sockets + * with use non-block socket and tree-shape reduction + * \author Tianqi Chen + */ +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE +#define NOMINMAX +#include +#include +#include +#include "./engine.h" +#include "./socket.h" + +namespace MPI { +class Datatype { + public: + size_t type_size; + Datatype(size_t type_size) : type_size(type_size) {} +}; +} +namespace engine { +/*! \brief implementation of sync goes to here */ +class SyncManager : public IEngine { + public: + const static int kMagic = 0xff99; + SyncManager(void) { + master_uri = "NULL"; + master_port = 9000; + host_uri = ""; + slave_port = 9010; + nport_trial = 1000; + rank = 0; + world_size = 1; + this->SetParam("reduce_buffer", "256MB"); + } + ~SyncManager(void) { + } + inline void Shutdown(void) { + for (size_t i = 0; i < links.size(); ++i) { + links[i].sock.Close(); + } + links.clear(); + utils::TCPSocket::Finalize(); + } + /*! \brief set parameters to the sync manager */ + inline void SetParam(const char *name, const char *val) { + if (!strcmp(name, "master_uri")) master_uri = val; + if (!strcmp(name, "master_port")) master_port = atoi(val); + if (!strcmp(name, "reduce_buffer")) { + char unit; + unsigned long amount; + if (sscanf(val, "%lu%c", &amount, &unit) == 2) { + switch (unit) { + case 'B': reduce_buffer_size = (amount + 7)/ 8; break; + case 'K': reduce_buffer_size = amount << 7UL; break; + case 'M': reduce_buffer_size = amount << 17UL; break; + case 'G': reduce_buffer_size = amount << 27UL; break; + default: utils::Error("invalid format for reduce buffer"); + } + } else { + utils::Error("invalid format for reduce_buffer, shhould be {integer}{unit}, unit can be {B, KB, MB, GB}"); + } + } + } + // initialize the manager + inline void Init(void) { + utils::TCPSocket::Startup(); + // single node mode + if (master_uri == "NULL") return; + utils::Assert(links.size() == 0, "can only call Init once"); + int magic = kMagic; + int nchild = 0, nparent = 0; + this->host_uri = utils::SockAddr::GetHostName(); + // get information from master + utils::TCPSocket master; + master.Create(); + master.Connect(utils::SockAddr(master_uri.c_str(), master_port)); + utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 1"); + utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 2"); + utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); + utils::Assert(master.RecvAll(&rank, sizeof(rank)) == sizeof(rank), "sync::Init failure 3"); + utils::Assert(master.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), "sync::Init failure 4"); + utils::Assert(master.RecvAll(&nparent, sizeof(nparent)) == sizeof(nparent), "sync::Init failure 5"); + utils::Assert(master.RecvAll(&nchild, sizeof(nchild)) == sizeof(nchild), "sync::Init failure 6"); + utils::Assert(nchild >= 0, "in correct number of childs"); + utils::Assert(nparent == 1 || nparent == 0, "in correct number of parent"); + + // create listen + utils::TCPSocket sock_listen; + sock_listen.Create(); + int port = sock_listen.TryBindHost(slave_port, slave_port + nport_trial); + utils::Check(port != -1, "sync::Init fail to bind the ports specified"); + sock_listen.Listen(); + + if (nparent != 0) { + parent_index = 0; + links.push_back(LinkRecord()); + int len, hport; + std::string hname; + utils::Assert(master.RecvAll(&len, sizeof(len)) == sizeof(len), "sync::Init failure 9"); + hname.resize(len); + utils::Assert(len != 0, "string must not be empty"); + utils::Assert(master.RecvAll(&hname[0], len) == static_cast(len), "sync::Init failure 10"); + utils::Assert(master.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "sync::Init failure 11"); + links[0].sock.Create(); + links[0].sock.Connect(utils::SockAddr(hname.c_str(), hport)); + utils::Assert(links[0].sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 12"); + utils::Assert(links[0].sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 13"); + utils::Check(magic == kMagic, "sync::Init failure, parent magic number mismatch"); + parent_index = 0; + } else { + parent_index = -1; + } + // send back socket listening port to master + utils::Assert(master.SendAll(&port, sizeof(port)) == sizeof(port), "sync::Init failure 14"); + // close connection to master + master.Close(); + // accept links from childs + for (int i = 0; i < nchild; ++i) { + LinkRecord r; + while (true) { + r.sock = sock_listen.Accept(); + if (r.sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic) && magic == kMagic) { + utils::Assert(r.sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 15"); + break; + } else { + // not a valid child + r.sock.Close(); + } + } + links.push_back(r); + } + // close listening sockets + sock_listen.Close(); + // setup selecter + selecter.Clear(); + for (size_t i = 0; i < links.size(); ++i) { + // set the socket to non-blocking mode + links[i].sock.SetNonBlock(true); + selecter.WatchRead(links[i].sock); + selecter.WatchWrite(links[i].sock); + } + // done + } + /*! \brief get rank */ + virtual int GetRank(void) const { + return rank; + } + /*! \brief get rank */ + virtual int GetWorldSize(void) const { + return world_size; + } + /*! \brief get rank */ + virtual std::string GetHost(void) const { + return host_uri; + } + /*! + * \brief perform in-place allreduce, on sendrecvbuf + * this function is NOT thread-safe + * \param sendrecvbuf_ buffer for both sending and recving data + * \param type_n4bytes the unit number of bytes the type have + * \param count number of elements to be reduced + * \param reducer reduce function + */ + virtual void AllReduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer) { + if (links.size() == 0) return; + // total size of message + const size_t total_size = type_nbytes * count; + // number of links + const int nlink = static_cast(links.size()); + // send recv buffer + char *sendrecvbuf = reinterpret_cast(sendrecvbuf_); + // size of space that we already performs reduce in up pass + size_t size_up_reduce = 0; + // size of space that we have already passed to parent + size_t size_up_out = 0; + // size of message we received, and send in the down pass + size_t size_down_in = 0; + + // initialize the link ring-buffer and pointer + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + links[i].InitBuffer(type_nbytes, count, reduce_buffer_size); + } + links[i].ResetSize(); + } + // if no childs, no need to reduce + if (nlink == static_cast(parent_index != -1)) { + size_up_reduce = total_size; + } + + // while we have not passed the messages out + while(true) { + selecter.Select(); + // read data from childs + for (int i = 0; i < nlink; ++i) { + if (i != parent_index && selecter.CheckRead(links[i].sock)) { + links[i].ReadToRingBuffer(size_up_out); + } + } + // this node have childs, peform reduce + if (nlink > static_cast(parent_index != -1)) { + size_t buffer_size = 0; + // do upstream reduce + size_t max_reduce = total_size; + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + max_reduce= std::min(max_reduce, links[i].size_read); + utils::Assert(buffer_size == 0 || buffer_size == links[i].buffer_size, + "buffer size inconsistent"); + buffer_size = links[i].buffer_size; + } + } + utils::Assert(buffer_size != 0, "must assign buffer_size"); + // round to type_n4bytes + max_reduce = (max_reduce / type_nbytes * type_nbytes); + // peform reduce, can be at most two rounds + while (size_up_reduce < max_reduce) { + // start position + size_t start = size_up_reduce % buffer_size; + // peform read till end of buffer + size_t nread = std::min(buffer_size - start, max_reduce - size_up_reduce); + utils::Assert(nread % type_nbytes == 0, "AllReduce: size check"); + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + reducer(links[i].buffer_head + start, + sendrecvbuf + size_up_reduce, + static_cast(nread / type_nbytes), + MPI::Datatype(type_nbytes)); + } + } + size_up_reduce += nread; + } + } + if (parent_index != -1) { + // pass message up to parent, can pass data that are already been reduced + if (selecter.CheckWrite(links[parent_index].sock)) { + size_up_out += links[parent_index].sock. + Send(sendrecvbuf + size_up_out, size_up_reduce - size_up_out); + } + // read data from parent + if (selecter.CheckRead(links[parent_index].sock)) { + size_down_in += links[parent_index].sock. + Recv(sendrecvbuf + size_down_in, total_size - size_down_in); + utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error"); + } + } else { + // this is root, can use reduce as most recent point + size_down_in = size_up_out = size_up_reduce; + } + // check if we finished the job of message passing + size_t nfinished = size_down_in; + // can pass message down to childs + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + if (selecter.CheckWrite(links[i].sock)) { + links[i].WriteFromArray(sendrecvbuf, size_down_in); + } + nfinished = std::min(links[i].size_write, nfinished); + } + } + // check boundary condition + if (nfinished >= total_size) break; + } + } + /*! + * \brief broadcast data from root to all nodes + * \param sendrecvbuf_ buffer for both sending and recving data + * \param size the size of the data to be broadcasted + * \param root the root worker id to broadcast the data + */ + virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { + if (links.size() == 0) return; + // number of links + const int nlink = static_cast(links.size()); + // size of space already read from data + size_t size_in = 0; + // input link, -2 means unknown yet, -1 means this is root + int in_link = -2; + + // initialize the link statistics + for (int i = 0; i < nlink; ++i) { + links[i].ResetSize(); + } + // root have all the data + if (this->rank == root) { + size_in = total_size; + in_link = -1; + } + + // while we have not passed the messages out + while(true) { + selecter.Select(); + if (in_link == -2) { + // probe in-link + for (int i = 0; i < nlink; ++i) { + if (selecter.CheckRead(links[i].sock)) { + links[i].ReadToArray(sendrecvbuf_, total_size); + size_in = links[i].size_read; + if (size_in != 0) { + in_link = i; break; + } + } + } + } else { + // read from in link + if (in_link >= 0 && selecter.CheckRead(links[in_link].sock)) { + links[in_link].ReadToArray(sendrecvbuf_, total_size); + size_in = links[in_link].size_read; + } + } + size_t nfinished = total_size; + // send data to all out-link + for (int i = 0; i < nlink; ++i) { + if (i != in_link) { + if (selecter.CheckWrite(links[i].sock)) { + links[i].WriteFromArray(sendrecvbuf_, size_in); + } + nfinished = std::min(nfinished, links[i].size_write); + } + } + // check boundary condition + if (nfinished >= total_size) break; + } + } + virtual bool LoadCheckPoint(utils::ISerializable *p_model) { + return false; + } + virtual void CheckPoint(const utils::ISerializable &model) { + } + + private: + // an independent child record + struct LinkRecord { + public: + // socket to get data from/to link + utils::TCPSocket sock; + // size of data readed from link + size_t size_read; + // size of data sent to the link + size_t size_write; + // pointer to buffer head + char *buffer_head; + // buffer size, in bytes + size_t buffer_size; + // initialize buffer + inline void InitBuffer(size_t type_nbytes, size_t count, size_t reduce_buffer_size) { + size_t n = (type_nbytes * count + 7)/ 8; + buffer_.resize(std::min(reduce_buffer_size, n)); + // make sure align to type_nbytes + buffer_size = buffer_.size() * sizeof(uint64_t) / type_nbytes * type_nbytes; + utils::Assert(type_nbytes <= buffer_size, "too large type_nbytes=%lu, buffer_size=%lu", type_nbytes, buffer_size); + // set buffer head + buffer_head = reinterpret_cast(BeginPtr(buffer_)); + } + // reset the recv and sent size + inline void ResetSize(void) { + size_write = size_read = 0; + } + /*! + * \brief read data into ring-buffer, with care not to existing useful override data + * position after protect_start + * \param protect_start all data start from protect_start is still needed in buffer + * read shall not override this + */ + inline void ReadToRingBuffer(size_t protect_start) { + size_t ngap = size_read - protect_start; + utils::Assert(ngap <= buffer_size, "AllReduce: boundary check"); + size_t offset = size_read % buffer_size; + size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); + size_read += sock.Recv(buffer_head + offset, nmax); + } + /*! + * \brief read data into array, + * this function can not be used together with ReadToRingBuffer + * a link can either read into the ring buffer, or existing array + * \param max_size maximum size of array + */ + inline void ReadToArray(void *recvbuf_, size_t max_size) { + char *p = static_cast(recvbuf_); + size_read += sock.Recv(p + size_read, max_size - size_read); + } + /*! + * \brief write data in array to sock + * \param sendbuf_ head of array + * \param max_size maximum size of array + */ + inline void WriteFromArray(const void *sendbuf_, size_t max_size) { + const char *p = static_cast(sendbuf_); + size_write += sock.Send(p + size_write, max_size - size_write); + } + + private: + // recv buffer to get data from child + // aligned with 64 bits, will be able to perform 64 bits operations freely + std::vector buffer_; + }; + //------------------ + // uri of current host, to be set by Init + std::string host_uri; + // uri of master + std::string master_uri; + // port of master address + int master_port; + // port of slave process + int slave_port, nport_trial; + // reduce buffer size + size_t reduce_buffer_size; + // current rank + int rank; + // world size + int world_size; + // index of parent link, can be -1, meaning this is root of the tree + int parent_index; + // sockets of all links + std::vector links; + // select helper + utils::SelectHelper selecter; +}; + +// singleton sync manager +SyncManager manager; + +/*! \brief intiialize the synchronization module */ +void Init(int argc, char *argv[]) { + for (int i = 1; i < argc; ++i) { + char name[256], val[256]; + if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { + manager.SetParam(name, val); + } + } + manager.Init(); +} +/*! \brief finalize syncrhonization module */ +void Finalize(void) { + manager.Shutdown(); +} +/*! \brief singleton method to get engine */ +IEngine *GetEngine(void) { + return &manager; +} + +} // namespace engine diff --git a/src/io.h b/src/io.h new file mode 100644 index 000000000..97a33f163 --- /dev/null +++ b/src/io.h @@ -0,0 +1,214 @@ +#ifndef ALLREDUCE_UTILS_IO_H +#define ALLREDUCE_UTILS_IO_H +#include +#include +#include +#include +#include "./utils.h" +/*! + * \file io.h + * \brief general stream interface for serialization, I/O + * \author Tianqi Chen + */ +namespace utils { +/*! + * \brief interface of stream I/O, used to serialize model + */ +class IStream { + public: + /*! + * \brief read data from stream + * \param ptr pointer to memory buffer + * \param size size of block + * \return usually is the size of data readed + */ + virtual size_t Read(void *ptr, size_t size) = 0; + /*! + * \brief write data to stream + * \param ptr pointer to memory buffer + * \param size size of block + */ + virtual void Write(const void *ptr, size_t size) = 0; + /*! \brief virtual destructor */ + virtual ~IStream(void) {} + + public: + // helper functions to write various of data structures + /*! + * \brief binary serialize a vector + * \param vec vector to be serialized + */ + template + inline void Write(const std::vector &vec) { + uint64_t sz = static_cast(vec.size()); + this->Write(&sz, sizeof(sz)); + if (sz != 0) { + this->Write(&vec[0], sizeof(T) * sz); + } + } + /*! + * \brief binary load a vector + * \param out_vec vector to be loaded + * \return whether load is successfull + */ + template + inline bool Read(std::vector *out_vec) { + uint64_t sz; + if (this->Read(&sz, sizeof(sz)) == 0) return false; + out_vec->resize(sz); + if (sz != 0) { + if (this->Read(&(*out_vec)[0], sizeof(T) * sz) == 0) return false; + } + return true; + } + /*! + * \brief binary serialize a string + * \param str the string to be serialized + */ + inline void Write(const std::string &str) { + uint64_t sz = static_cast(str.length()); + this->Write(&sz, sizeof(sz)); + if (sz != 0) { + this->Write(&str[0], sizeof(char) * sz); + } + } + /*! + * \brief binary load a string + * \param out_str string to be loaded + * \return whether load is successful + */ + inline bool Read(std::string *out_str) { + uint64_t sz; + if (this->Read(&sz, sizeof(sz)) == 0) return false; + out_str->resize(sz); + if (sz != 0) { + if (this->Read(&(*out_str)[0], sizeof(char) * sz) == 0) return false; + } + return true; + } +}; + +/*! \brief interface of se*/ +class ISerializable { + /*! \brief load the model from file */ + virtual void Load(IStream &fi) = 0; + /*! \brief save the model to the stream*/ + virtual void Save(IStream &fo) const = 0; +}; + +/*! \brief interface of i/o stream that support seek */ +class ISeekStream: public IStream { + public: + /*! \brief seek to certain position of the file */ + virtual void Seek(size_t pos) = 0; + /*! \brief tell the position of the stream */ + virtual size_t Tell(void) = 0; +}; + +/*! \brief fixed size memory buffer */ +struct MemoryFixSizeBuffer : public ISeekStream { + public: + MemoryFixSizeBuffer(void *p_buffer, size_t buffer_size) + : p_buffer_(reinterpret_cast(p_buffer)), buffer_size_(buffer_size) { + curr_ptr_ = 0; + } + virtual ~MemoryFixSizeBuffer(void) {} + virtual size_t Read(void *ptr, size_t size) { + utils::Assert(curr_ptr_ + size <= buffer_size_, + "read can not have position excceed buffer length"); + size_t nread = std::min(buffer_size_ - curr_ptr_, size); + if (nread != 0) memcpy(ptr, p_buffer_ + curr_ptr_, nread); + curr_ptr_ += nread; + return nread; + } + virtual void Write(const void *ptr, size_t size) { + if (size == 0) return; + utils::Assert(curr_ptr_ + size <= buffer_size_, + "write position exceed fixed buffer size"); + memcpy(p_buffer_ + curr_ptr_, ptr, size); + curr_ptr_ += size; + } + virtual void Seek(size_t pos) { + curr_ptr_ = static_cast(pos); + } + virtual size_t Tell(void) { + return curr_ptr_; + } + + private: + /*! \brief in memory buffer */ + char *p_buffer_; + /*! \brief current pointer */ + size_t buffer_size_; + /*! \brief current pointer */ + size_t curr_ptr_; +}; // class MemoryFixSizeBuffer + +/*! \brief a in memory buffer that can be read and write as stream interface */ +struct MemoryBufferStream : public ISeekStream { + public: + MemoryBufferStream(std::string *p_buffer) + : p_buffer_(p_buffer) { + curr_ptr_ = 0; + } + virtual ~MemoryBufferStream(void) {} + virtual size_t Read(void *ptr, size_t size) { + utils::Assert(curr_ptr_ <= p_buffer_->length(), + "read can not have position excceed buffer length"); + size_t nread = std::min(p_buffer_->length() - curr_ptr_, size); + if (nread != 0) memcpy(ptr, &(*p_buffer_)[0] + curr_ptr_, nread); + curr_ptr_ += nread; + return nread; + } + virtual void Write(const void *ptr, size_t size) { + if (size == 0) return; + if (curr_ptr_ + size > p_buffer_->length()) { + p_buffer_->resize(curr_ptr_+size); + } + memcpy(&(*p_buffer_)[0] + curr_ptr_, ptr, size); + curr_ptr_ += size; + } + virtual void Seek(size_t pos) { + curr_ptr_ = static_cast(pos); + } + virtual size_t Tell(void) { + return curr_ptr_; + } + + private: + /*! \brief in memory buffer */ + std::string *p_buffer_; + /*! \brief current pointer */ + size_t curr_ptr_; +}; // class MemoryBufferStream + +/*! \brief implementation of file i/o stream */ +class FileStream : public ISeekStream { + public: + explicit FileStream(FILE *fp) : fp(fp) {} + explicit FileStream(void) { + this->fp = NULL; + } + virtual size_t Read(void *ptr, size_t size) { + return std::fread(ptr, size, 1, fp); + } + virtual void Write(const void *ptr, size_t size) { + std::fwrite(ptr, size, 1, fp); + } + virtual void Seek(size_t pos) { + std::fseek(fp, static_cast(pos), SEEK_SET); + } + virtual size_t Tell(void) { + return std::ftell(fp); + } + inline void Close(void) { + if (fp != NULL){ + std::fclose(fp); fp = NULL; + } + } + + private: + FILE *fp; +}; +} // namespace utils +#endif diff --git a/src/socket.h b/src/socket.h new file mode 100644 index 000000000..a18d9a576 --- /dev/null +++ b/src/socket.h @@ -0,0 +1,387 @@ +#ifndef ALLREDUCE_SOCKET_H +#define ALLREDUCE_SOCKET_H +/*! + * \file socket.h + * \brief this file aims to provide a wrapper of sockets + * \author Tianqi Chen + */ +#if defined(_WIN32) +#include +#include +#else +#include +#include +#include +#include +#include +#include +#include +#include +#endif +#include +#include +#include "./utils.h" + +namespace utils { +#if defined(_WIN32) +typedef int ssize_t; +typedef int sock_size_t; +#else +typedef int SOCKET; +typedef size_t sock_size_t; +const int INVALID_SOCKET = -1; +#endif + +/*! \brief data structure for network address */ +struct SockAddr { + sockaddr_in addr; + // constructor + SockAddr(void) {} + SockAddr(const char *url, int port) { + this->Set(url, port); + } + inline static std::string GetHostName(void) { + std::string buf; buf.resize(256); + utils::Check(gethostname(&buf[0], 256) != -1, "fail to get host name"); + return std::string(buf.c_str()); + } + /*! + * \brief set the address + * \param url the url of the address + * \param port the port of address + */ + inline void Set(const char *host, int port) { + hostent *hp = gethostbyname(host); + Check(hp != NULL, "cannot obtain address of %s", host); + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + memcpy(&addr.sin_addr, hp->h_addr_list[0], hp->h_length); + } + /*! \brief return port of the address*/ + inline int port(void) const { + return ntohs(addr.sin_port); + } + /*! \return a string representation of the address */ + inline std::string AddrStr(void) const { + std::string buf; buf.resize(256); +#ifdef _WIN32 + const char *s = inet_ntop(AF_INET, (PVOID)&addr.sin_addr, &buf[0], buf.length()); +#else + const char *s = inet_ntop(AF_INET, &addr.sin_addr, &buf[0], buf.length()); +#endif + Assert(s != NULL, "cannot decode address"); + return std::string(s); + } +}; +/*! + * \brief a wrapper of TCP socket that hopefully be cross platform + */ +class TCPSocket { + public: + /*! \brief the file descriptor of socket */ + SOCKET sockfd; + // constructor + TCPSocket(void) : sockfd(INVALID_SOCKET) { + } + explicit TCPSocket(SOCKET sockfd) : sockfd(sockfd) { + } + ~TCPSocket(void) { + // do nothing in destructor + // user need to take care of close + } + // default conversion to int + inline operator SOCKET() const { + return sockfd; + } + /*! + * \brief create the socket, call this before using socket + * \param af domain + */ + inline void Create(int af = PF_INET) { + sockfd = socket(PF_INET, SOCK_STREAM, 0); + if (sockfd == INVALID_SOCKET) { + SockError("Create"); + } + } + /*! + * \brief start up the socket module + * call this before using the sockets + */ + inline static void Startup(void) { +#ifdef _WIN32 + WSADATA wsa_data; + if (WSAStartup(MAKEWORD(2, 2), &wsa_data) != -1) { + SockError("Startup"); + } + if (LOBYTE(wsa_data.wVersion) != 2 || HIBYTE(wsa_data.wVersion) != 2) { + WSACleanup(); + utils::Error("Could not find a usable version of Winsock.dll\n"); + } +#endif + } + /*! + * \brief shutdown the socket module after use, all sockets need to be closed + */ + inline static void Finalize(void) { +#ifdef _WIN32 + WSACleanup(); +#endif + } + /*! + * \brief set this socket to use non-blocking mode + * \param non_block whether set it to be non-block, if it is false + * it will set it back to block mode + */ + inline void SetNonBlock(bool non_block) { +#ifdef _WIN32 + u_long mode = non_block ? 1 : 0; + if (ioctlsocket(sockfd, FIONBIO, &mode) != NO_ERROR) { + SockError("SetNonBlock"); + } +#else + int flag = fcntl(sockfd, F_GETFL, 0); + if (flag == -1) { + SockError("SetNonBlock-1"); + } + if (non_block) { + flag |= O_NONBLOCK; + } else { + flag &= ~O_NONBLOCK; + } + if (fcntl(sockfd, F_SETFL, flag) == -1) { + SockError("SetNonBlock-2"); + } +#endif + } + /*! + * \brief perform listen of the socket + * \param backlog backlog parameter + */ + inline void Listen(int backlog = 16) { + listen(sockfd, backlog); + } + /*! \brief get a new connection */ + TCPSocket Accept(void) { + SOCKET newfd = accept(sockfd, NULL, NULL); + if (newfd == INVALID_SOCKET) { + SockError("Accept"); + } + return TCPSocket(newfd); + } + /*! + * \brief bind the socket to an address + * \param addr + */ + inline void Bind(const SockAddr &addr) { + if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == -1) { + SockError("Bind"); + } + } + /*! + * \brief try bind the socket to host, from start_port to end_port + * \param start_port starting port number to try + * \param end_port ending port number to try + * \param out_addr the binding address, if successful + * \return whether the binding is successful + */ + inline int TryBindHost(int start_port, int end_port) { + for (int port = start_port; port < end_port; ++port) { + SockAddr addr("0.0.0.0", port); + if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == 0) { + return port; + } + if (errno != EADDRINUSE) { + SockError("TryBindHost"); + } + } + return -1; + } + /*! + * \brief connect to an address + * \param addr the address to connect to + */ + inline void Connect(const SockAddr &addr) { + if (connect(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == -1) { + SockError("Connect"); + } + } + /*! \brief close the connection */ + inline void Close(void) { + if (sockfd != -1) { +#ifdef _WIN32 + closesocket(sockfd); +#else + close(sockfd); +#endif + sockfd = INVALID_SOCKET; + } else { + Error("TCPSocket::Close double close the socket or close without create"); + } + } + /*! + * \brief send data using the socket + * \param buf the pointer to the buffer + * \param len the size of the buffer + * \param flags extra flags + * \return size of data actually sent + */ + inline size_t Send(const void *buf_, size_t len, int flag = 0) { + const char *buf = reinterpret_cast(buf_); + if (len == 0) return 0; + ssize_t ret = send(sockfd, buf, static_cast(len), flag); + if (ret == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) return 0; + SockError("Send"); + } + return ret; + } + /*! + * \brief receive data using the socket + * \param buf_ the pointer to the buffer + * \param len the size of the buffer + * \param flags extra flags + * \return size of data actually received + */ + inline size_t Recv(void *buf_, size_t len, int flags = 0) { + char *buf = reinterpret_cast(buf_); + if (len == 0) return 0; + ssize_t ret = recv(sockfd, buf, static_cast(len), flags); + if (ret == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) return 0; + SockError("Recv"); + } + return ret; + } + /*! + * \brief peform block write that will attempt to send all data out + * can still return smaller than request when error occurs + * \param buf the pointer to the buffer + * \param len the size of the buffer + * \return size of data actually sent + */ + inline size_t SendAll(const void *buf_, size_t len) { + const char *buf = reinterpret_cast(buf_); + size_t ndone = 0; + while (ndone < len) { + ssize_t ret = send(sockfd, buf, static_cast(len - ndone), 0); + if (ret == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) return ndone; + SockError("Recv"); + } + buf += ret; + ndone += ret; + } + return ndone; + } + /*! + * \brief peforma block read that will attempt to read all data + * can still return smaller than request when error occurs + * \param buf_ the buffer pointer + * \param len length of data to recv + * \return size of data actually sent + */ + inline size_t RecvAll(void *buf_, size_t len) { + char *buf = reinterpret_cast(buf_); + size_t ndone = 0; + while (ndone < len) { + ssize_t ret = recv(sockfd, buf, static_cast(len - ndone), MSG_WAITALL); + if (ret == -1) { + if (errno == EAGAIN || errno == EWOULDBLOCK) return ndone; + SockError("Recv"); + } + if (ret == 0) return ndone; + buf += ret; + ndone += ret; + } + return ndone; + } + + private: + // report an socket error + inline static void SockError(const char *msg) { + int errsv = errno; + Error("Socket %s Error:%s", msg, strerror(errsv)); + } +}; +/*! \brief helper data structure to perform select */ +struct SelectHelper { + public: + SelectHelper(void) { + this->Clear(); + } + /*! + * \brief add file descriptor to watch for read + * \param fd file descriptor to be watched + */ + inline void WatchRead(SOCKET fd) { + read_fds.push_back(fd); + if (fd > maxfd) maxfd = fd; + } + /*! + * \brief add file descriptor to watch for write + * \param fd file descriptor to be watched + */ + inline void WatchWrite(SOCKET fd) { + write_fds.push_back(fd); + if (fd > maxfd) maxfd = fd; + } + /*! + * \brief Check if the descriptor is ready for read + * \param fd file descriptor to check status + */ + inline bool CheckRead(SOCKET fd) const { + return FD_ISSET(fd, &read_set) != 0; + } + /*! + * \brief Check if the descriptor is ready for write + * \param fd file descriptor to check status + */ + inline bool CheckWrite(SOCKET fd) const { + return FD_ISSET(fd, &write_set) != 0; + } + /*! + * \brief clear all the monitored descriptors + */ + inline void Clear(void) { + read_fds.clear(); + write_fds.clear(); + maxfd = 0; + } + /*! + * \brief peform select on the set defined + * \param timeout specify timeout in micro-seconds(ms) if equals 0, means select will always block + * \return number of active descriptors selected + */ + inline int Select(long timeout = 0) { + FD_ZERO(&read_set); + FD_ZERO(&write_set); + for (size_t i = 0; i < read_fds.size(); ++i) { + FD_SET(read_fds[i], &read_set); + } + for (size_t i = 0; i < write_fds.size(); ++i) { + FD_SET(write_fds[i], &write_set); + } + int ret; + if (timeout == 0) { + ret = select(static_cast(maxfd + 1), &read_set, &write_set, NULL, NULL); + } else { + timeval tm; + tm.tv_usec = (timeout % 1000) * 1000; + tm.tv_sec = timeout / 1000; + ret = select(static_cast(maxfd + 1), &read_set, &write_set, NULL, &tm); + } + if (ret == -1) { + int errsv = errno; + Error("Select Error: %s", strerror(errsv)); + } + return ret; + } + + private: + SOCKET maxfd; + fd_set read_set, write_set; + std::vector read_fds, write_fds; +}; +} +#endif diff --git a/src/tcp_master.py b/src/tcp_master.py new file mode 100644 index 000000000..c0820f14b --- /dev/null +++ b/src/tcp_master.py @@ -0,0 +1,106 @@ +""" +Master script for xgboost, tcp_master +This script can be used to start jobs of multi-node xgboost using sync_tcp + +Tianqi Chen +""" + +import sys +import os +import socket +import struct +import subprocess +from threading import Thread + +class ExSocket: + def __init__(self, sock): + self.sock = sock + def recvall(self, nbytes): + res = [] + sock = self.sock + nread = 0 + while nread < nbytes: + chunk = self.sock.recv(min(nbytes - nread, 1024), socket.MSG_WAITALL) + nread += len(chunk) + res.append(chunk) + return ''.join(res) + def recvint(self): + return struct.unpack('@i', self.recvall(4))[0] + def sendint(self, n): + self.sock.sendall(struct.pack('@i', n)) + def sendstr(self, s): + self.sendint(len(s)) + self.sock.sendall(s) + +# magic number used to verify existence of data +kMagic = 0xff99 + +class Master: + def __init__(self, port = 9000, port_end = 9999): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + for port in range(port, port_end): + try: + sock.bind(('', port)) + self.port = port + break + except socket.error: + continue + sock.listen(16) + self.sock = sock + print 'start listen on %s:%d' % (socket.gethostname(), self.port) + def __del__(self): + self.sock.close() + def slave_args(self): + return ['master_uri=%s' % socket.gethostname(), + 'master_port=%s' % self.port] + def accept_slaves(self, nslave): + slave_addrs = [] + for rank in range(nslave): + while True: + fd, s_addr = self.sock.accept() + slave = ExSocket(fd) + nparent = int(rank != 0) + nchild = 0 + if (rank + 1) * 2 - 1 < nslave: + nchild += 1 + if (rank + 1) * 2 < nslave: + nchild += 1 + try: + magic = slave.recvint() + if magic != kMagic: + print 'invalid magic number=%d from %s' % (magic, s_addr[0]) + slave.sock.close() + continue + except socket.error: + print 'sock error in %s' % (s_addr[0]) + slave.sock.close() + continue + slave.sendint(kMagic) + slave.sendint(rank) + slave.sendint(nslave) + slave.sendint(nparent) + slave.sendint(nchild) + if nparent != 0: + parent_index = (rank + 1) / 2 - 1 + ptuple = slave_addrs[parent_index] + slave.sendstr(ptuple[0]) + slave.sendint(ptuple[1]) + s_port = slave.recvint() + assert rank == len(slave_addrs) + slave_addrs.append((s_addr[0], s_port)) + slave.sock.close() + print 'finish starting rank=%d at %s' % (rank, s_addr[0]) + break + print 'all slaves setup complete' + +def mpi_submit(nslave, args): + cmd = ' '.join(['mpirun -n %d' % nslave] + args) + print cmd + return subprocess.check_call(cmd, shell = True) + +def submit(nslave, args, fun_submit = mpi_submit): + master = Master() + submit_thread = Thread(target = fun_submit, args = (nslave, args + master.slave_args())) + submit_thread.start() + master.accept_slaves(nslave) + submit_thread.join() diff --git a/src/utils.h b/src/utils.h new file mode 100644 index 000000000..2c529c449 --- /dev/null +++ b/src/utils.h @@ -0,0 +1,176 @@ +#ifndef ALLREDUCE_UTILS_H_ +#define ALLREDUCE_UTILS_H_ +/*! + * \file utils.h + * \brief simple utils to support the code + * \author Tianqi Chen + */ +#define _CRT_SECURE_NO_WARNINGS +#include +#include +#include +#include + +#ifndef ALLREDUCE_STRICT_CXX98_ +#include +#endif + +#if !defined(__GNUC__) +#define fopen64 std::fopen +#endif +#ifdef _MSC_VER +// NOTE: sprintf_s is not equivalent to snprintf, +// they are equivalent when success, which is sufficient for our case +#define snprintf sprintf_s +#define vsnprintf vsprintf_s +#else +#ifdef _FILE_OFFSET_BITS +#if _FILE_OFFSET_BITS == 32 +#pragma message ("Warning: FILE OFFSET BITS defined to be 32 bit") +#endif +#endif + +#ifdef __APPLE__ +#define off64_t off_t +#define fopen64 std::fopen +#endif + +extern "C" { +#include +} +#endif + +#ifdef _MSC_VER +typedef unsigned char uint8_t; +typedef unsigned short int uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long uint64_t; +typedef long int64_t; +#else +#include +#endif + +/*! \brief namespace for helper utils of the project */ +namespace utils { + +/*! \brief error message buffer length */ +const int kPrintBuffer = 1 << 12; + +#ifndef ALLREDUCE_CUSTOMIZE_MSG_ +/*! + * \brief handling of Assert error, caused by in-apropriate input + * \param msg error message + */ +inline void HandleAssertError(const char *msg) { + fprintf(stderr, "AssertError:%s\n", msg); + exit(-1); +} +/*! + * \brief handling of Check error, caused by in-apropriate input + * \param msg error message + */ +inline void HandleCheckError(const char *msg) { + fprintf(stderr, "%s\n", msg); + exit(-1); +} +inline void HandlePrint(const char *msg) { + printf("%s", msg); +} +#else +#ifndef ALLREDUCE_STRICT_CXX98_ +// include declarations, some one must implement this +void HandleAssertError(const char *msg); +void HandleCheckError(const char *msg); +void HandlePrint(const char *msg); +#endif +#endif +#ifdef ALLREDUCE_STRICT_CXX98_ +// these function pointers are to be assigned +extern "C" void (*Printf)(const char *fmt, ...); +extern "C" int (*SPrintf)(char *buf, size_t size, const char *fmt, ...); +extern "C" void (*Assert)(int exp, const char *fmt, ...); +extern "C" void (*Check)(int exp, const char *fmt, ...); +extern "C" void (*Error)(const char *fmt, ...); +#else +/*! \brief printf, print message to the console */ +inline void Printf(const char *fmt, ...) { + std::string msg(kPrintBuffer, '\0'); + va_list args; + va_start(args, fmt); + vsnprintf(&msg[0], kPrintBuffer, fmt, args); + va_end(args); + HandlePrint(msg.c_str()); +} +/*! \brief portable version of snprintf */ +inline int SPrintf(char *buf, size_t size, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + int ret = vsnprintf(buf, size, fmt, args); + va_end(args); + return ret; +} + +/*! \brief assert an condition is true, use this to handle debug information */ +inline void Assert(bool exp, const char *fmt, ...) { + if (!exp) { + std::string msg(kPrintBuffer, '\0'); + va_list args; + va_start(args, fmt); + vsnprintf(&msg[0], kPrintBuffer, fmt, args); + va_end(args); + HandleAssertError(msg.c_str()); + } +} + +/*!\brief same as assert, but this is intended to be used as message for user*/ +inline void Check(bool exp, const char *fmt, ...) { + if (!exp) { + std::string msg(kPrintBuffer, '\0'); + va_list args; + va_start(args, fmt); + vsnprintf(&msg[0], kPrintBuffer, fmt, args); + va_end(args); + HandleCheckError(msg.c_str()); + } +} + +/*! \brief report error message, same as check */ +inline void Error(const char *fmt, ...) { + { + std::string msg(kPrintBuffer, '\0'); + va_list args; + va_start(args, fmt); + vsnprintf(&msg[0], kPrintBuffer, fmt, args); + va_end(args); + HandleCheckError(msg.c_str()); + } +} +#endif + +/*! \brief replace fopen, report error when the file open fails */ +inline std::FILE *FopenCheck(const char *fname, const char *flag) { + std::FILE *fp = fopen64(fname, flag); + Check(fp != NULL, "can not open file \"%s\"\n", fname); + return fp; +} +} // namespace utils +// easy utils that can be directly acessed in xgboost +/*! \brief get the beginning address of a vector */ +template +inline T *BeginPtr(std::vector &vec) { + if (vec.size() == 0) { + return NULL; + } else { + return &vec[0]; + } +} +/*! \brief get the beginning address of a vector */ +template +inline const T *BeginPtr(const std::vector &vec) { + if (vec.size() == 0) { + return NULL; + } else { + return &vec[0]; + } +} +#endif // ALLREDUCE_UTILS_H_ diff --git a/submit_job_tcp.py b/submit_job_tcp.py new file mode 100755 index 000000000..d79ef53bf --- /dev/null +++ b/submit_job_tcp.py @@ -0,0 +1,36 @@ +#!/usr/bin/python +""" +This is an example script to create a customized job submit +script using xgboost sync_tcp mode +""" +import sys +import os +import subprocess +# import the tcp_master.py +# add path to sync +sys.path.append(os.path.dirname(__file__)+'/src/') +import tcp_master as master + +# +# Note: this submit script is only used for example purpose +# It does not have to be mpirun, it can be any job submission script that starts the job, qsub, hadoop streaming etc. +# +def mpi_submit(nslave, args): + """ + customized submit script, that submit nslave jobs, each must contain args as parameter + note this can be a lambda function containing additional parameters in input + Parameters + nslave number of slave process to start up + args arguments to launch each job + this usually includes the parameters of master_uri and parameters passed into submit + """ + cmd = ' '.join(['mpirun -n %d' % nslave] + args) + print cmd + subprocess.check_call(cmd, shell = True) + +if __name__ == '__main__': + if len(sys.argv) < 2: + print 'Usage: ' + exit(0) + # call submit, with nslave, the commands to run each job and submit function + master.submit(int(sys.argv[1]), sys.argv[2:], fun_submit= mpi_submit) diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 000000000..18f8c7481 --- /dev/null +++ b/test/Makefile @@ -0,0 +1,33 @@ +export CC = gcc +export CXX = g++ +export MPICXX = mpicxx +export LDFLAGS= -pthread -lm +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src + +ifeq ($(no_omp),1) + CFLAGS += -DDISABLE_OPENMP +else + CFLAGS += -fopenmp +endif + +# specify tensor path +BIN = test_allreduce +OBJ = engine_tcp.o +.PHONY: clean all + +all: $(BIN) $(MPIBIN) + +engine_tcp.o: ../src/engine_tcp.cpp ../src/*.h +test_allreduce: test_allreduce.cpp ../src/*.h engine_tcp.o + +$(BIN) : + $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) + +$(OBJ) : + $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) + +$(MPIBIN) : + $(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) + +clean: + $(RM) $(BIN) $(MPIBIN) *~ ../src/*~ diff --git a/test/test.sh b/test/test.sh new file mode 100755 index 000000000..5e5ef546d --- /dev/null +++ b/test/test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +if [ "$#" -ne 2 ]; +then + echo "Usage " + exit -1 +fi +../submit_job_tcp.py $1 test_allreduce $2 diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp new file mode 100644 index 000000000..407abf139 --- /dev/null +++ b/test/test_allreduce.cpp @@ -0,0 +1,80 @@ +#include +#include +#include +#include +#include + +using namespace sync; + +inline void TestMax(size_t n) { + int rank = sync::GetRank(); + int nproc = sync::GetWorldSize(); + + std::vector ndata(n); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % 111; + } + sync::AllReduce(&ndata[0], ndata.size()); + for (size_t i = 0; i < ndata.size(); ++i) { + float rmax = (i * 1) % 111; + for (int r = 0; r < nproc; ++r) { + rmax = std::max(rmax, (float)((i * (r+1)) % 111)); + } + utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank); + } +} + +inline void TestSum(size_t n) { + int rank = sync::GetRank(); + int nproc = sync::GetWorldSize(); + const int z = 131; + + std::vector ndata(n); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % z; + } + sync::AllReduce(&ndata[0], ndata.size()); + for (size_t i = 0; i < ndata.size(); ++i) { + float rsum = 0.0f; + for (int r = 0; r < nproc; ++r) { + rsum += (float)((i * (r+1)) % z); + } + utils::Check(fabsf(rsum - ndata[i]) < 1e-5 , + "[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]); + } +} + +inline void TestBcast(size_t n, int root) { + int rank = sync::GetRank(); + std::string s; s.resize(n); + for (size_t i = 0; i < n; ++i) { + s[i] = char(i % 126 + 1); + } + std::string res; + if (root == rank) { + res = s; + sync::Bcast(&res, root); + } else { + sync::Bcast(&res, root); + } + utils::Check(res == s, "[%d] TestBcast fail", rank); +} + +int main(int argc, char *argv[]) { + if (argc < 2) { + printf("Usage: \n"); + return 0; + } + int n = atoi(argv[1]); + sync::Init(argc, argv); + int rank = sync::GetRank(); + std::string name = sync::GetProcessorName(); + printf("[%d] start at %s\n", rank, name.c_str()); + TestMax(n); + printf("[%d] TestMax pass\n", rank); + TestSum(n); + printf("[%d] TestSum pass\n", rank); + sync::Finalize(); + printf("[%d] all check pass\n", rank); + return 0; +} From 54fcff189f00ae7a86e8a289b810d1b8e6384815 Mon Sep 17 00:00:00 2001 From: nachocano Date: Wed, 26 Nov 2014 16:37:23 -0800 Subject: [PATCH 003/531] dummy mock for now --- src/mock.h | 96 +++++++++++++++++++++++++++++++++++++++++ test/Makefile | 4 ++ test/test_allreduce.cpp | 31 +++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 src/mock.h diff --git a/src/mock.h b/src/mock.h new file mode 100644 index 000000000..71e65dc0a --- /dev/null +++ b/src/mock.h @@ -0,0 +1,96 @@ +#ifndef ALLREDUCE_MOCK_H +#define ALLREDUCE_MOCK_H +/*! + * \file mock.h + * \brief This file defines a mock object to test the system + * \author Tianqi Chen, Nacho, Tianyi + */ +#include "./engine.h" +#include "./utils.h" +#include + #include + + +/*! \brief namespace of mock */ +namespace test { + +class Mock { + + typedef std::map > Map; + +public: + + Mock() : record(true) {} + + inline void Replay() { + record = false; + } + + // record methods + + inline void OnAllReduce(int rank, int code) { + utils::Check(record, "Not in record state"); + Map::iterator it = allReduce.find(rank); + if (it == allReduce.end()) { + std::queue aQueue; + allReduce[rank] = aQueue; + } + allReduce[rank].push(code); + } + + inline void OnBroadcast() { + utils::Check(record, "Not in record state"); + } + + inline void OnLoadCheckpoint() { + utils::Check(record, "Not in record state"); + } + + inline void OnCheckpoint() { + utils::Check(record, "Not in record state"); + } + + + // replay methods + + inline int AllReduce(int rank) { + utils::Check(!record, "Not in replay state"); + utils::Check(allReduce.find(rank) != allReduce.end(), "Not recorded"); + int result = 0; + if (!allReduce[rank].empty()) { + result = allReduce[rank].front(); + allReduce[rank].pop(); + } + return result; + } + + inline int Broadcast(int rank) { + utils::Check(!record, "Not in replay state"); + return 0; + } + + inline int LoadCheckpoint(int rank) { + utils::Check(!record, "Not in replay state"); + return 0; + } + + inline int Checkpoint(int rank) { + utils::Check(!record, "Not in replay state"); + return 0; + } + + +private: + + // flag to indicate if the mock is in record state + bool record; + + Map allReduce; + Map broadcast; + Map loadCheckpoint; + Map checkpoint; +}; + +} + +#endif // ALLREDUCE_MOCK_H diff --git a/test/Makefile b/test/Makefile index 18f8c7481..78c6095e4 100644 --- a/test/Makefile +++ b/test/Makefile @@ -9,6 +9,10 @@ ifeq ($(no_omp),1) else CFLAGS += -fopenmp endif +ifeq ($(test),1) + CFLAGS += -DTEST +endif + # specify tensor path BIN = test_allreduce diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 407abf139..43876a4a9 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -3,6 +3,8 @@ #include #include #include +#include + using namespace sync; @@ -60,6 +62,27 @@ inline void TestBcast(size_t n, int root) { utils::Check(res == s, "[%d] TestBcast fail", rank); } +// ugly stuff, just to see if it works +inline void record(test::Mock& mock, int rank) { + switch(rank) { + case 0: + mock.OnAllReduce(0, -1); + break; + case 1: + mock.OnAllReduce(1, -1); + break; + case 2: + mock.OnAllReduce(2, 0); + break; + } +} + +// to be removed, should be added in engine tcp +inline void replay(test::Mock& mock, int rank) { + printf("[%d] All reduce %d\n", rank, mock.AllReduce(rank)); + printf("[%d] All reduce %d\n", rank, mock.AllReduce(rank)); +} + int main(int argc, char *argv[]) { if (argc < 2) { printf("Usage: \n"); @@ -69,6 +92,14 @@ int main(int argc, char *argv[]) { sync::Init(argc, argv); int rank = sync::GetRank(); std::string name = sync::GetProcessorName(); + + #ifdef TEST + test::Mock mock; + record(mock, rank); + mock.Replay(); + replay(mock, rank); + #endif + printf("[%d] start at %s\n", rank, name.c_str()); TestMax(n); printf("[%d] TestMax pass\n", rank); From c565104491ec998304c171729a9e8675bcaea335 Mon Sep 17 00:00:00 2001 From: nachocano Date: Wed, 26 Nov 2014 17:24:29 -0800 Subject: [PATCH 004/531] adding some references to mock inside TEST preprocessor directive. It shouldn't be an assert because it shutdowns the process. Instead should check on the value and return some sort of error, so that we can recover. The mock contains queues, indexed by the rank of the process. For each node, you can configure the behavior you expect (success or failure for now) when you call any of the methods (AllReduce, Broadcast, LoadCheckPoint and CheckPoint)... If you call several times AllReduce, the outputs will pop from the queue, i.e., first you can retrieve a success, then a failure and so on. Pretty basic for now, need to tune it better --- src/allreduce.h | 8 +++++ src/engine.h | 9 +++++ src/engine_tcp.cpp | 36 ++++++++++++++++++++ src/mock.h | 73 +++++++++++++++++++++-------------------- test/test_allreduce.cpp | 11 ++++--- 5 files changed, 98 insertions(+), 39 deletions(-) diff --git a/src/allreduce.h b/src/allreduce.h index c9bd0e579..9f150dcf4 100644 --- a/src/allreduce.h +++ b/src/allreduce.h @@ -4,6 +4,9 @@ * \author Tianqi Chen, Nacho, Tianyi */ #include "./engine.h" +#ifdef TEST + #include "./mock.h" +#endif /*! \brief namespace of all reduce */ namespace sync { @@ -43,6 +46,11 @@ void Init(int argc, char *argv[]) { void Finalize(void) { engine::Finalize(); } +#ifdef TEST +void SetMock(test::Mock& mock) { + engine::SetMock(mock); +} +#endif /*! \brief get rank of current process */ inline int GetRank(void) { return engine::GetEngine()->GetRank(); diff --git a/src/engine.h b/src/engine.h index ca928b22a..852e9187a 100644 --- a/src/engine.h +++ b/src/engine.h @@ -6,6 +6,10 @@ * \author Tianqi Chen, Nacho, Tianyi */ #include "./io.h" +#ifdef TEST + #include "./mock.h" +#endif + namespace MPI { /*! \brief MPI data type just to be compatible with MPI reduce function*/ @@ -76,5 +80,10 @@ void Init(int argc, char *argv[]); void Finalize(void); /*! \brief singleton method to get engine */ IEngine *GetEngine(void); + +#ifdef TEST +void SetMock(test::Mock& mock); +#endif + } // namespace engine #endif // ALLREDUCE_ENGINE_H diff --git a/src/engine_tcp.cpp b/src/engine_tcp.cpp index a0506129d..957319db4 100644 --- a/src/engine_tcp.cpp +++ b/src/engine_tcp.cpp @@ -12,6 +12,9 @@ #include #include "./engine.h" #include "./socket.h" +#ifdef TEST + #include "./mock.h" +#endif namespace MPI { class Datatype { @@ -37,6 +40,13 @@ class SyncManager : public IEngine { } ~SyncManager(void) { } + + #ifdef TEST + inline void SetMock(test::Mock& mock) { + this->mock = mock; + } + #endif + inline void Shutdown(void) { for (size_t i = 0; i < links.size(); ++i) { links[i].sock.Close(); @@ -168,6 +178,9 @@ class SyncManager : public IEngine { size_t type_nbytes, size_t count, ReduceFunction reducer) { + #ifdef TEST + utils::Assert(mock.AllReduce(this->rank), "Error returned by mock when reducing"); + #endif if (links.size() == 0) return; // total size of message const size_t total_size = type_nbytes * count; @@ -275,6 +288,10 @@ class SyncManager : public IEngine { * \param root the root worker id to broadcast the data */ virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { + #ifdef TEST + utils::Assert(mock.Broadcast(this->rank), "Error returned by mock when broadcasting"); + #endif + if (links.size() == 0) return; // number of links const int nlink = static_cast(links.size()); @@ -329,9 +346,15 @@ class SyncManager : public IEngine { } } virtual bool LoadCheckPoint(utils::ISerializable *p_model) { + #ifdef TEST + utils::Assert(mock.LoadCheckPoint(this->rank), "Error returned by mock when loading checkpoint"); + #endif return false; } virtual void CheckPoint(const utils::ISerializable &model) { + #ifdef TEST + utils::Assert(mock.CheckPoint(this->rank), "Error returned by mock when checkpointing"); + #endif } private: @@ -421,6 +444,11 @@ class SyncManager : public IEngine { std::vector links; // select helper utils::SelectHelper selecter; + + #ifdef TEST + // mock to test + test::Mock mock; + #endif }; // singleton sync manager @@ -436,6 +464,14 @@ void Init(int argc, char *argv[]) { } manager.Init(); } + +#ifdef TEST +/*! \brief sets a mock to the manager for testing purposes */ +void SetMock(test::Mock& mock) { + manager.SetMock(mock); +} +#endif + /*! \brief finalize syncrhonization module */ void Finalize(void) { manager.Shutdown(); diff --git a/src/mock.h b/src/mock.h index 71e65dc0a..c6bfd89fd 100644 --- a/src/mock.h +++ b/src/mock.h @@ -16,7 +16,7 @@ namespace test { class Mock { - typedef std::map > Map; + typedef std::map > Map; public: @@ -27,61 +27,64 @@ public: } // record methods - - inline void OnAllReduce(int rank, int code) { - utils::Check(record, "Not in record state"); - Map::iterator it = allReduce.find(rank); - if (it == allReduce.end()) { - std::queue aQueue; - allReduce[rank] = aQueue; - } - allReduce[rank].push(code); + inline void OnAllReduce(int rank, bool success) { + onRecord(allReduce, rank, success); } - inline void OnBroadcast() { - utils::Check(record, "Not in record state"); + inline void OnBroadcast(int rank, bool success) { + onRecord(broadcast, rank, success); } - inline void OnLoadCheckpoint() { - utils::Check(record, "Not in record state"); + inline void OnLoadCheckPoint(int rank, bool success) { + onRecord(loadCheckpoint, rank, success); } - inline void OnCheckpoint() { - utils::Check(record, "Not in record state"); + inline void OnCheckPoint(int rank, bool success) { + onRecord(checkpoint, rank, success); } // replay methods - - inline int AllReduce(int rank) { - utils::Check(!record, "Not in replay state"); - utils::Check(allReduce.find(rank) != allReduce.end(), "Not recorded"); - int result = 0; - if (!allReduce[rank].empty()) { - result = allReduce[rank].front(); - allReduce[rank].pop(); - } - return result; + inline bool AllReduce(int rank) { + return onReplay(allReduce, rank); } - inline int Broadcast(int rank) { - utils::Check(!record, "Not in replay state"); - return 0; + inline bool Broadcast(int rank) { + return onReplay(broadcast, rank); } - inline int LoadCheckpoint(int rank) { - utils::Check(!record, "Not in replay state"); - return 0; + inline bool LoadCheckPoint(int rank) { + return onReplay(loadCheckpoint, rank); } - inline int Checkpoint(int rank) { - utils::Check(!record, "Not in replay state"); - return 0; + inline bool CheckPoint(int rank) { + return onReplay(checkpoint, rank); } private: + inline void onRecord(Map& m, int rank, bool success) { + utils::Check(record, "Not in record state"); + Map::iterator it = m.find(rank); + if (it == m.end()) { + std::queue aQueue; + m[rank] = aQueue; + } + m[rank].push(success); + } + + inline bool onReplay(Map& m, int rank) { + utils::Check(!record, "Not in replay state"); + utils::Check(m.find(rank) != m.end(), "Not recorded"); + bool result = true; + if (!m[rank].empty()) { + result = m[rank].front(); + m[rank].pop(); + } + return result; + } + // flag to indicate if the mock is in record state bool record; diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 43876a4a9..6da800f90 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -66,13 +66,13 @@ inline void TestBcast(size_t n, int root) { inline void record(test::Mock& mock, int rank) { switch(rank) { case 0: - mock.OnAllReduce(0, -1); + mock.OnAllReduce(0, false); break; case 1: - mock.OnAllReduce(1, -1); + mock.OnAllReduce(1, false); break; case 2: - mock.OnAllReduce(2, 0); + mock.OnAllReduce(2, true); break; } } @@ -97,9 +97,12 @@ int main(int argc, char *argv[]) { test::Mock mock; record(mock, rank); mock.Replay(); - replay(mock, rank); + //replay(mock, rank); + sync::SetMock(mock); #endif + + printf("[%d] start at %s\n", rank, name.c_str()); TestMax(n); printf("[%d] TestMax pass\n", rank); From 2f1ba40786259f2aa15e85fa320dd44962cc78ce Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 27 Nov 2014 16:17:07 -0800 Subject: [PATCH 005/531] change in socket, to pass out error code --- src/engine_tcp.cpp | 68 ++++++++++---- src/socket.h | 223 +++++++++++++++++++++++++-------------------- 2 files changed, 173 insertions(+), 118 deletions(-) diff --git a/src/engine_tcp.cpp b/src/engine_tcp.cpp index 957319db4..e00b70f1c 100644 --- a/src/engine_tcp.cpp +++ b/src/engine_tcp.cpp @@ -76,7 +76,7 @@ class SyncManager : public IEngine { } // initialize the manager inline void Init(void) { - utils::TCPSocket::Startup(); + utils::Socket::Startup(); // single node mode if (master_uri == "NULL") return; utils::Assert(links.size() == 0, "can only call Init once"); @@ -86,7 +86,9 @@ class SyncManager : public IEngine { // get information from master utils::TCPSocket master; master.Create(); - master.Connect(utils::SockAddr(master_uri.c_str(), master_port)); + if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) { + utils::Socket::Error("Connect"); + } utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 1"); utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 2"); utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); @@ -213,7 +215,9 @@ class SyncManager : public IEngine { // read data from childs for (int i = 0; i < nlink; ++i) { if (i != parent_index && selecter.CheckRead(links[i].sock)) { - links[i].ReadToRingBuffer(size_up_out); + if (!links[i].ReadToRingBuffer(size_up_out)) { + utils::Socket::Error("Recv"); + } } } // this node have childs, peform reduce @@ -252,15 +256,25 @@ class SyncManager : public IEngine { } if (parent_index != -1) { // pass message up to parent, can pass data that are already been reduced - if (selecter.CheckWrite(links[parent_index].sock)) { - size_up_out += links[parent_index].sock. + if (selecter.CheckWrite(links[parent_index].sock)) { + ssize_t len = links[parent_index].sock. Send(sendrecvbuf + size_up_out, size_up_reduce - size_up_out); + if (len != -1) { + size_up_out += static_cast(len); + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) utils::Socket::Error("Recv"); + } } // read data from parent if (selecter.CheckRead(links[parent_index].sock)) { - size_down_in += links[parent_index].sock. + ssize_t len = links[parent_index].sock. Recv(sendrecvbuf + size_down_in, total_size - size_down_in); - utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error"); + if (len != -1) { + size_down_in += static_cast(len); + utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error"); + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) utils::Socket::Error("Recv"); + } } } else { // this is root, can use reduce as most recent point @@ -272,7 +286,9 @@ class SyncManager : public IEngine { for (int i = 0; i < nlink; ++i) { if (i != parent_index) { if (selecter.CheckWrite(links[i].sock)) { - links[i].WriteFromArray(sendrecvbuf, size_down_in); + if (!links[i].WriteFromArray(sendrecvbuf, size_down_in)) { + utils::Socket::Error("Send"); + } } nfinished = std::min(links[i].size_write, nfinished); } @@ -317,7 +333,9 @@ class SyncManager : public IEngine { // probe in-link for (int i = 0; i < nlink; ++i) { if (selecter.CheckRead(links[i].sock)) { - links[i].ReadToArray(sendrecvbuf_, total_size); + if (!links[i].ReadToArray(sendrecvbuf_, total_size)) { + utils::Socket::Error("Recv"); + } size_in = links[i].size_read; if (size_in != 0) { in_link = i; break; @@ -327,7 +345,9 @@ class SyncManager : public IEngine { } else { // read from in link if (in_link >= 0 && selecter.CheckRead(links[in_link].sock)) { - links[in_link].ReadToArray(sendrecvbuf_, total_size); + if(!links[in_link].ReadToArray(sendrecvbuf_, total_size)) { + utils::Socket::Error("Recv"); + } size_in = links[in_link].size_read; } } @@ -336,7 +356,9 @@ class SyncManager : public IEngine { for (int i = 0; i < nlink; ++i) { if (i != in_link) { if (selecter.CheckWrite(links[i].sock)) { - links[i].WriteFromArray(sendrecvbuf_, size_in); + if (!links[i].WriteFromArray(sendrecvbuf_, size_in)) { + utils::Socket::Error("Send"); + } } nfinished = std::min(nfinished, links[i].size_write); } @@ -390,32 +412,44 @@ class SyncManager : public IEngine { * position after protect_start * \param protect_start all data start from protect_start is still needed in buffer * read shall not override this + * \return true if it is an successful read, false if there is some error happens, check errno */ - inline void ReadToRingBuffer(size_t protect_start) { + inline bool ReadToRingBuffer(size_t protect_start) { size_t ngap = size_read - protect_start; utils::Assert(ngap <= buffer_size, "AllReduce: boundary check"); size_t offset = size_read % buffer_size; size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); - size_read += sock.Recv(buffer_head + offset, nmax); + ssize_t len = sock.Recv(buffer_head + offset, nmax); + if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; + size_read += static_cast(len); + return true; } /*! * \brief read data into array, * this function can not be used together with ReadToRingBuffer * a link can either read into the ring buffer, or existing array * \param max_size maximum size of array + * \return true if it is an successful read, false if there is some error happens, check errno */ - inline void ReadToArray(void *recvbuf_, size_t max_size) { + inline bool ReadToArray(void *recvbuf_, size_t max_size) { char *p = static_cast(recvbuf_); - size_read += sock.Recv(p + size_read, max_size - size_read); + ssize_t len = sock.Recv(p + size_read, max_size - size_read); + if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; + size_read += static_cast(len); + return true; } /*! * \brief write data in array to sock * \param sendbuf_ head of array * \param max_size maximum size of array + * \return true if it is an successful write, false if there is some error happens, check errno */ - inline void WriteFromArray(const void *sendbuf_, size_t max_size) { + inline bool WriteFromArray(const void *sendbuf_, size_t max_size) { const char *p = static_cast(sendbuf_); - size_write += sock.Send(p + size_write, max_size - size_write); + ssize_t len = sock.Send(p + size_write, max_size - size_write); + if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; + size_write += static_cast(len); + return true; } private: diff --git a/src/socket.h b/src/socket.h index a18d9a576..307ec89df 100644 --- a/src/socket.h +++ b/src/socket.h @@ -22,7 +22,6 @@ #include #include "./utils.h" -namespace utils { #if defined(_WIN32) typedef int ssize_t; typedef int sock_size_t; @@ -32,6 +31,7 @@ typedef size_t sock_size_t; const int INVALID_SOCKET = -1; #endif +namespace utils { /*! \brief data structure for network address */ struct SockAddr { sockaddr_in addr; @@ -74,36 +74,18 @@ struct SockAddr { return std::string(s); } }; + /*! - * \brief a wrapper of TCP socket that hopefully be cross platform + * \brief base class containing common operations of TCP and UDP sockets */ -class TCPSocket { +class Socket { public: /*! \brief the file descriptor of socket */ SOCKET sockfd; - // constructor - TCPSocket(void) : sockfd(INVALID_SOCKET) { - } - explicit TCPSocket(SOCKET sockfd) : sockfd(sockfd) { - } - ~TCPSocket(void) { - // do nothing in destructor - // user need to take care of close - } // default conversion to int inline operator SOCKET() const { return sockfd; } - /*! - * \brief create the socket, call this before using socket - * \param af domain - */ - inline void Create(int af = PF_INET) { - sockfd = socket(PF_INET, SOCK_STREAM, 0); - if (sockfd == INVALID_SOCKET) { - SockError("Create"); - } - } /*! * \brief start up the socket module * call this before using the sockets @@ -112,7 +94,7 @@ class TCPSocket { #ifdef _WIN32 WSADATA wsa_data; if (WSAStartup(MAKEWORD(2, 2), &wsa_data) != -1) { - SockError("Startup"); + Socket::Error("Startup"); } if (LOBYTE(wsa_data.wVersion) != 2 || HIBYTE(wsa_data.wVersion) != 2) { WSACleanup(); @@ -137,12 +119,12 @@ class TCPSocket { #ifdef _WIN32 u_long mode = non_block ? 1 : 0; if (ioctlsocket(sockfd, FIONBIO, &mode) != NO_ERROR) { - SockError("SetNonBlock"); + Socket::Error("SetNonBlock"); } #else int flag = fcntl(sockfd, F_GETFL, 0); if (flag == -1) { - SockError("SetNonBlock-1"); + Socket::Error("SetNonBlock-1"); } if (non_block) { flag |= O_NONBLOCK; @@ -150,10 +132,81 @@ class TCPSocket { flag &= ~O_NONBLOCK; } if (fcntl(sockfd, F_SETFL, flag) == -1) { - SockError("SetNonBlock-2"); + Socket::Error("SetNonBlock-2"); } #endif } + /*! + * \brief bind the socket to an address + * \param addr + */ + inline void Bind(const SockAddr &addr) { + if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == -1) { + Socket::Error("Bind"); + } + } + /*! + * \brief try bind the socket to host, from start_port to end_port + * \param start_port starting port number to try + * \param end_port ending port number to try + * \return the port successfully bind to, return -1 if failed to bind any port + */ + inline int TryBindHost(int start_port, int end_port) { + for (int port = start_port; port < end_port; ++port) { + SockAddr addr("0.0.0.0", port); + if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == 0) { + return port; + } + if (errno != EADDRINUSE) { + Socket::Error("TryBindHost"); + } + } + return -1; + } + /*! \brief close the socket */ + inline void Close(void) { + if (sockfd != INVALID_SOCKET) { +#ifdef _WIN32 + closesocket(sockfd); +#else + close(sockfd); +#endif + sockfd = INVALID_SOCKET; + } else { + Error("Socket::Close double close the socket or close without create"); + } + } + + // report an socket error + inline static void Error(const char *msg) { + int errsv = errno; + utils::Error("Socket %s Error:%s", msg, strerror(errsv)); + } + protected: + explicit Socket(SOCKET sockfd) : sockfd(sockfd) { + } +}; + +/*! + * \brief a wrapper of TCP socket that hopefully be cross platform + */ +class TCPSocket : public Socket{ + public: + // constructor + TCPSocket(void) : Socket(INVALID_SOCKET) { + } + explicit TCPSocket(SOCKET sockfd) : Socket(sockfd) { + } + /*! + * \brief create the socket, call this before using socket + * \param af domain + */ + inline void Create(int af = PF_INET) { + sockfd = socket(PF_INET, SOCK_STREAM, 0); + if (sockfd == INVALID_SOCKET) { + Socket::Error("Create"); + } + } /*! * \brief perform listen of the socket * \param backlog backlog parameter @@ -165,93 +218,43 @@ class TCPSocket { TCPSocket Accept(void) { SOCKET newfd = accept(sockfd, NULL, NULL); if (newfd == INVALID_SOCKET) { - SockError("Accept"); + Socket::Error("Accept"); } return TCPSocket(newfd); } - /*! - * \brief bind the socket to an address - * \param addr - */ - inline void Bind(const SockAddr &addr) { - if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == -1) { - SockError("Bind"); - } - } - /*! - * \brief try bind the socket to host, from start_port to end_port - * \param start_port starting port number to try - * \param end_port ending port number to try - * \param out_addr the binding address, if successful - * \return whether the binding is successful - */ - inline int TryBindHost(int start_port, int end_port) { - for (int port = start_port; port < end_port; ++port) { - SockAddr addr("0.0.0.0", port); - if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == 0) { - return port; - } - if (errno != EADDRINUSE) { - SockError("TryBindHost"); - } - } - return -1; - } /*! * \brief connect to an address * \param addr the address to connect to + * \return whether connect is successful */ - inline void Connect(const SockAddr &addr) { - if (connect(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == -1) { - SockError("Connect"); - } - } - /*! \brief close the connection */ - inline void Close(void) { - if (sockfd != -1) { -#ifdef _WIN32 - closesocket(sockfd); -#else - close(sockfd); -#endif - sockfd = INVALID_SOCKET; - } else { - Error("TCPSocket::Close double close the socket or close without create"); - } + inline bool Connect(const SockAddr &addr) { + return connect(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == 0; } /*! - * \brief send data using the socket + * \brief send data using the socket * \param buf the pointer to the buffer * \param len the size of the buffer * \param flags extra flags * \return size of data actually sent + * return -1 if error occurs */ - inline size_t Send(const void *buf_, size_t len, int flag = 0) { + inline ssize_t Send(const void *buf_, size_t len, int flag = 0) { const char *buf = reinterpret_cast(buf_); if (len == 0) return 0; - ssize_t ret = send(sockfd, buf, static_cast(len), flag); - if (ret == -1) { - if (errno == EAGAIN || errno == EWOULDBLOCK) return 0; - SockError("Send"); - } - return ret; - } + return send(sockfd, buf, static_cast(len), flag); + } /*! * \brief receive data using the socket * \param buf_ the pointer to the buffer * \param len the size of the buffer * \param flags extra flags - * \return size of data actually received + * \return size of data actually received + * return -1 if error occurs */ - inline size_t Recv(void *buf_, size_t len, int flags = 0) { + inline ssize_t Recv(void *buf_, size_t len, int flags = 0) { char *buf = reinterpret_cast(buf_); - if (len == 0) return 0; - ssize_t ret = recv(sockfd, buf, static_cast(len), flags); - if (ret == -1) { - if (errno == EAGAIN || errno == EWOULDBLOCK) return 0; - SockError("Recv"); - } - return ret; + if (len == 0) return 0; + return recv(sockfd, buf, static_cast(len), flags); } /*! * \brief peform block write that will attempt to send all data out @@ -267,7 +270,7 @@ class TCPSocket { ssize_t ret = send(sockfd, buf, static_cast(len - ndone), 0); if (ret == -1) { if (errno == EAGAIN || errno == EWOULDBLOCK) return ndone; - SockError("Recv"); + Socket::Error("SendAll"); } buf += ret; ndone += ret; @@ -288,7 +291,7 @@ class TCPSocket { ssize_t ret = recv(sockfd, buf, static_cast(len - ndone), MSG_WAITALL); if (ret == -1) { if (errno == EAGAIN || errno == EWOULDBLOCK) return ndone; - SockError("Recv"); + Socket::Error("RecvAll"); } if (ret == 0) return ndone; buf += ret; @@ -298,12 +301,8 @@ class TCPSocket { } private: - // report an socket error - inline static void SockError(const char *msg) { - int errsv = errno; - Error("Socket %s Error:%s", msg, strerror(errsv)); - } }; + /*! \brief helper data structure to perform select */ struct SelectHelper { public: @@ -326,6 +325,14 @@ struct SelectHelper { write_fds.push_back(fd); if (fd > maxfd) maxfd = fd; } + /*! + * \brief add file descriptor to watch for exception + * \param fd file descriptor to be watched + */ + inline void WatchException(SOCKET fd) { + except_fds.push_back(fd); + if (fd > maxfd) maxfd = fd; + } /*! * \brief Check if the descriptor is ready for read * \param fd file descriptor to check status @@ -340,12 +347,20 @@ struct SelectHelper { inline bool CheckWrite(SOCKET fd) const { return FD_ISSET(fd, &write_set) != 0; } + /*! + * \brief Check if the descriptor has any exception + * \param fd file descriptor to check status + */ + inline bool CheckExcept(SOCKET fd) const { + return FD_ISSET(fd, &except_set) != 0; + } /*! * \brief clear all the monitored descriptors */ inline void Clear(void) { read_fds.clear(); write_fds.clear(); + except_fds.clear(); maxfd = 0; } /*! @@ -356,20 +371,26 @@ struct SelectHelper { inline int Select(long timeout = 0) { FD_ZERO(&read_set); FD_ZERO(&write_set); + FD_ZERO(&except_set); for (size_t i = 0; i < read_fds.size(); ++i) { FD_SET(read_fds[i], &read_set); } for (size_t i = 0; i < write_fds.size(); ++i) { FD_SET(write_fds[i], &write_set); } + for (size_t i = 0; i < except_fds.size(); ++i) { + FD_SET(except_fds[i], &except_set); + } int ret; if (timeout == 0) { - ret = select(static_cast(maxfd + 1), &read_set, &write_set, NULL, NULL); + ret = select(static_cast(maxfd + 1), &read_set, + &write_set, &except_set, NULL); } else { timeval tm; tm.tv_usec = (timeout % 1000) * 1000; tm.tv_sec = timeout / 1000; - ret = select(static_cast(maxfd + 1), &read_set, &write_set, NULL, &tm); + ret = select(static_cast(maxfd + 1), &read_set, + &write_set, &except_set, &tm); } if (ret == -1) { int errsv = errno; @@ -380,8 +401,8 @@ struct SelectHelper { private: SOCKET maxfd; - fd_set read_set, write_set; - std::vector read_fds, write_fds; + fd_set read_set, write_set, except_set; + std::vector read_fds, write_fds, except_fds; }; } #endif From 21f3f3eec416e3846ff950e4f6173a590b8343fe Mon Sep 17 00:00:00 2001 From: nachocano Date: Thu, 27 Nov 2014 17:03:31 -0800 Subject: [PATCH 006/531] adding const to variable to comply with google code convention... may need to change more stuff though. Taint what else do you mean? Spaces, tabs, names? --- src/allreduce.h | 2 +- src/engine.h | 2 +- src/engine_tcp.cpp | 4 ++-- test/Makefile | 2 +- test/test_allreduce.cpp | 15 +++------------ 5 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/allreduce.h b/src/allreduce.h index 9f150dcf4..ef3fe589b 100644 --- a/src/allreduce.h +++ b/src/allreduce.h @@ -47,7 +47,7 @@ void Finalize(void) { engine::Finalize(); } #ifdef TEST -void SetMock(test::Mock& mock) { +void SetMock(const test::Mock& mock) { engine::SetMock(mock); } #endif diff --git a/src/engine.h b/src/engine.h index 852e9187a..dc3a14049 100644 --- a/src/engine.h +++ b/src/engine.h @@ -82,7 +82,7 @@ void Finalize(void); IEngine *GetEngine(void); #ifdef TEST -void SetMock(test::Mock& mock); +void SetMock(const test::Mock& mock); #endif } // namespace engine diff --git a/src/engine_tcp.cpp b/src/engine_tcp.cpp index e00b70f1c..c34d7d86b 100644 --- a/src/engine_tcp.cpp +++ b/src/engine_tcp.cpp @@ -42,7 +42,7 @@ class SyncManager : public IEngine { } #ifdef TEST - inline void SetMock(test::Mock& mock) { + inline void SetMock(const test::Mock& mock) { this->mock = mock; } #endif @@ -501,7 +501,7 @@ void Init(int argc, char *argv[]) { #ifdef TEST /*! \brief sets a mock to the manager for testing purposes */ -void SetMock(test::Mock& mock) { +void SetMock(const test::Mock& mock) { manager.SetMock(mock); } #endif diff --git a/test/Makefile b/test/Makefile index 78c6095e4..ef752de74 100644 --- a/test/Makefile +++ b/test/Makefile @@ -34,4 +34,4 @@ $(MPIBIN) : $(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) clean: - $(RM) $(BIN) $(MPIBIN) *~ ../src/*~ + $(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~ diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 6da800f90..185d8ea6d 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -62,8 +62,8 @@ inline void TestBcast(size_t n, int root) { utils::Check(res == s, "[%d] TestBcast fail", rank); } -// ugly stuff, just to see if it works -inline void record(test::Mock& mock, int rank) { +// ugly stuff, just to see if it works. To be removed +inline void Record(test::Mock& mock, const int rank) { switch(rank) { case 0: mock.OnAllReduce(0, false); @@ -77,12 +77,6 @@ inline void record(test::Mock& mock, int rank) { } } -// to be removed, should be added in engine tcp -inline void replay(test::Mock& mock, int rank) { - printf("[%d] All reduce %d\n", rank, mock.AllReduce(rank)); - printf("[%d] All reduce %d\n", rank, mock.AllReduce(rank)); -} - int main(int argc, char *argv[]) { if (argc < 2) { printf("Usage: \n"); @@ -95,14 +89,11 @@ int main(int argc, char *argv[]) { #ifdef TEST test::Mock mock; - record(mock, rank); + Record(mock, rank); mock.Replay(); - //replay(mock, rank); sync::SetMock(mock); #endif - - printf("[%d] start at %s\n", rank, name.c_str()); TestMax(n); printf("[%d] TestMax pass\n", rank); From faed8285cd4846afed63e21086577c76da97be3c Mon Sep 17 00:00:00 2001 From: nachocano Date: Fri, 28 Nov 2014 00:16:35 -0800 Subject: [PATCH 007/531] execute it like ./test.sh 4 4000 testcase0.conf to obtain a successful execution updating mock. It now wraps the calls to sync and reads config from configuration file. I believe it's better not to use the preprocessor directive, i.e. not to put any test code in the engine_tcp. I just call the mock in the test_allreduce file. It's a file purely for testing purposes, so it's fine to use the mock there. --- .gitignore | 2 +- src/allreduce.h | 12 +-- src/config.h | 195 ++++++++++++++++++++++++++++++++++++++++ src/engine.h | 7 -- src/engine_tcp.cpp | 33 ------- src/mock.h | 102 ++++++++++----------- test/Makefile | 4 - test/test.sh | 6 +- test/test_allreduce.cpp | 44 +++------ test/testcase0.conf | 1 + test/testcase1.conf | 12 +++ 11 files changed, 274 insertions(+), 144 deletions(-) create mode 100644 src/config.h create mode 100644 test/testcase0.conf create mode 100644 test/testcase1.conf diff --git a/.gitignore b/.gitignore index 2922a01e6..f087cd689 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,4 @@ *.app *~ *.pyc -test \ No newline at end of file +test_allreduce \ No newline at end of file diff --git a/src/allreduce.h b/src/allreduce.h index ef3fe589b..264541211 100644 --- a/src/allreduce.h +++ b/src/allreduce.h @@ -1,12 +1,11 @@ +#ifndef ALLREDUCE_H +#define ALLREDUCE_H /*! * \file allreduce.h * \brief This file defines a template wrapper of engine to ensure * \author Tianqi Chen, Nacho, Tianyi */ #include "./engine.h" -#ifdef TEST - #include "./mock.h" -#endif /*! \brief namespace of all reduce */ namespace sync { @@ -46,11 +45,7 @@ void Init(int argc, char *argv[]) { void Finalize(void) { engine::Finalize(); } -#ifdef TEST -void SetMock(const test::Mock& mock) { - engine::SetMock(mock); -} -#endif + /*! \brief get rank of current process */ inline int GetRank(void) { return engine::GetEngine()->GetRank(); @@ -113,3 +108,4 @@ inline void CheckPoint(const utils::ISerializable &model) { engine::GetEngine()->CheckPoint(model); } } // namespace allreduce +#endif // ALLREDUCE_H diff --git a/src/config.h b/src/config.h new file mode 100644 index 000000000..45da45bdb --- /dev/null +++ b/src/config.h @@ -0,0 +1,195 @@ +#ifndef ALLREDUCE_UTILS_CONFIG_H_ +#define ALLREDUCE_UTILS_CONFIG_H_ +/*! + * \file config.h + * \brief helper class to load in configures from file + * \author Tianqi Chen + */ +#include +#include +#include +#include +#include +#include "./utils.h" + +namespace utils { +/*! + * \brief base implementation of config reader + */ +class ConfigReaderBase { + public: + /*! + * \brief get current name, called after Next returns true + * \return current parameter name + */ + inline const char *name(void) const { + return s_name; + } + /*! + * \brief get current value, called after Next returns true + * \return current parameter value + */ + inline const char *val(void) const { + return s_val; + } + /*! + * \brief move iterator to next position + * \return true if there is value in next position + */ + inline bool Next(void) { + while (!this->IsEnd()) { + GetNextToken(s_name); + if (s_name[0] == '=') return false; + if (GetNextToken( s_buf ) || s_buf[0] != '=') return false; + if (GetNextToken( s_val ) || s_val[0] == '=') return false; + return true; + } + return false; + } + // called before usage + inline void Init(void) { + ch_buf = this->GetChar(); + } + + protected: + /*! + * \brief to be implemented by subclass, + * get next token, return EOF if end of file + */ + virtual char GetChar(void) = 0; + /*! \brief to be implemented by child, check if end of stream */ + virtual bool IsEnd(void) = 0; + + private: + char ch_buf; + char s_name[100000], s_val[100000], s_buf[100000]; + + inline void SkipLine(void) { + do { + ch_buf = this->GetChar(); + } while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r'); + } + + inline void ParseStr(char tok[]) { + int i = 0; + while ((ch_buf = this->GetChar()) != EOF) { + switch (ch_buf) { + case '\\': tok[i++] = this->GetChar(); break; + case '\"': tok[i++] = '\0'; return; + case '\r': + case '\n': Error("ConfigReader: unterminated string"); + default: tok[i++] = ch_buf; + } + } + Error("ConfigReader: unterminated string"); + } + inline void ParseStrML(char tok[]) { + int i = 0; + while ((ch_buf = this->GetChar()) != EOF) { + switch (ch_buf) { + case '\\': tok[i++] = this->GetChar(); break; + case '\'': tok[i++] = '\0'; return; + default: tok[i++] = ch_buf; + } + } + Error("unterminated string"); + } + // return newline + inline bool GetNextToken(char tok[]) { + int i = 0; + bool new_line = false; + while (ch_buf != EOF) { + switch (ch_buf) { + case '#' : SkipLine(); new_line = true; break; + case '\"': + if (i == 0) { + ParseStr(tok); ch_buf = this->GetChar(); return new_line; + } else { + Error("ConfigReader: token followed directly by string"); + } + case '\'': + if (i == 0) { + ParseStrML( tok ); ch_buf = this->GetChar(); return new_line; + } else { + Error("ConfigReader: token followed directly by string"); + } + case '=': + if (i == 0) { + ch_buf = this->GetChar(); + tok[0] = '='; + tok[1] = '\0'; + } else { + tok[i] = '\0'; + } + return new_line; + case '\r': + case '\n': + if (i == 0) new_line = true; + case '\t': + case ' ' : + ch_buf = this->GetChar(); + if (i > 0) { + tok[i] = '\0'; + return new_line; + } + break; + default: + tok[i++] = ch_buf; + ch_buf = this->GetChar(); + break; + } + } + return true; + } +}; +/*! + * \brief an iterator use stream base, allows use all types of istream + */ +class ConfigStreamReader: public ConfigReaderBase { + public: + /*! + * \brief constructor + * \param istream input stream + */ + explicit ConfigStreamReader(std::istream &fin) : fin(fin) {} + + protected: + virtual char GetChar(void) { + return fin.get(); + } + /*! \brief to be implemented by child, check if end of stream */ + virtual bool IsEnd(void) { + return fin.eof(); + } + + private: + std::istream &fin; +}; + +/*! + * \brief an iterator that iterates over a configure file and gets the configures + */ +class ConfigIterator: public ConfigStreamReader { + public: + /*! + * \brief constructor + * \param fname name of configure file + */ + explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) { + fi.open(fname); + if (fi.fail()) { + utils::Error("cannot open file %s", fname); + } + ConfigReaderBase::Init(); + } + /*! \brief destructor */ + ~ConfigIterator(void) { + fi.close(); + } + + private: + std::ifstream fi; +}; +} // namespace utils + +#endif // ALLREDUCE_UTILS_CONFIG_H_ diff --git a/src/engine.h b/src/engine.h index dc3a14049..ce8603d6f 100644 --- a/src/engine.h +++ b/src/engine.h @@ -6,9 +6,6 @@ * \author Tianqi Chen, Nacho, Tianyi */ #include "./io.h" -#ifdef TEST - #include "./mock.h" -#endif namespace MPI { @@ -81,9 +78,5 @@ void Finalize(void); /*! \brief singleton method to get engine */ IEngine *GetEngine(void); -#ifdef TEST -void SetMock(const test::Mock& mock); -#endif - } // namespace engine #endif // ALLREDUCE_ENGINE_H diff --git a/src/engine_tcp.cpp b/src/engine_tcp.cpp index c34d7d86b..4cbbe384f 100644 --- a/src/engine_tcp.cpp +++ b/src/engine_tcp.cpp @@ -12,9 +12,6 @@ #include #include "./engine.h" #include "./socket.h" -#ifdef TEST - #include "./mock.h" -#endif namespace MPI { class Datatype { @@ -41,12 +38,6 @@ class SyncManager : public IEngine { ~SyncManager(void) { } - #ifdef TEST - inline void SetMock(const test::Mock& mock) { - this->mock = mock; - } - #endif - inline void Shutdown(void) { for (size_t i = 0; i < links.size(); ++i) { links[i].sock.Close(); @@ -180,9 +171,6 @@ class SyncManager : public IEngine { size_t type_nbytes, size_t count, ReduceFunction reducer) { - #ifdef TEST - utils::Assert(mock.AllReduce(this->rank), "Error returned by mock when reducing"); - #endif if (links.size() == 0) return; // total size of message const size_t total_size = type_nbytes * count; @@ -304,10 +292,6 @@ class SyncManager : public IEngine { * \param root the root worker id to broadcast the data */ virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { - #ifdef TEST - utils::Assert(mock.Broadcast(this->rank), "Error returned by mock when broadcasting"); - #endif - if (links.size() == 0) return; // number of links const int nlink = static_cast(links.size()); @@ -368,15 +352,9 @@ class SyncManager : public IEngine { } } virtual bool LoadCheckPoint(utils::ISerializable *p_model) { - #ifdef TEST - utils::Assert(mock.LoadCheckPoint(this->rank), "Error returned by mock when loading checkpoint"); - #endif return false; } virtual void CheckPoint(const utils::ISerializable &model) { - #ifdef TEST - utils::Assert(mock.CheckPoint(this->rank), "Error returned by mock when checkpointing"); - #endif } private: @@ -479,10 +457,6 @@ class SyncManager : public IEngine { // select helper utils::SelectHelper selecter; - #ifdef TEST - // mock to test - test::Mock mock; - #endif }; // singleton sync manager @@ -499,13 +473,6 @@ void Init(int argc, char *argv[]) { manager.Init(); } -#ifdef TEST -/*! \brief sets a mock to the manager for testing purposes */ -void SetMock(const test::Mock& mock) { - manager.SetMock(mock); -} -#endif - /*! \brief finalize syncrhonization module */ void Finalize(void) { manager.Shutdown(); diff --git a/src/mock.h b/src/mock.h index c6bfd89fd..8eb5629e6 100644 --- a/src/mock.h +++ b/src/mock.h @@ -5,9 +5,8 @@ * \brief This file defines a mock object to test the system * \author Tianqi Chen, Nacho, Tianyi */ -#include "./engine.h" -#include "./utils.h" -#include +#include "./allreduce.h" +#include "./config.h" #include @@ -16,82 +15,73 @@ namespace test { class Mock { - typedef std::map > Map; public: - Mock() : record(true) {} - - inline void Replay() { - record = false; + Mock(const int& rank, char *config) : rank(rank) { + Init(config); } - // record methods - inline void OnAllReduce(int rank, bool success) { - onRecord(allReduce, rank, success); + template + inline void AllReduce(float *sendrecvbuf, size_t count) { + utils::Assert(verify(allReduce), "[%d] error when calling allReduce", rank); + sync::AllReduce(sendrecvbuf, count); } - inline void OnBroadcast(int rank, bool success) { - onRecord(broadcast, rank, success); + inline bool LoadCheckPoint(utils::ISerializable *p_model) { + utils::Assert(verify(loadCheckpoint), "[%d] error when loading checkpoint", rank); + return sync::LoadCheckPoint(p_model); } - inline void OnLoadCheckPoint(int rank, bool success) { - onRecord(loadCheckpoint, rank, success); + inline void CheckPoint(const utils::ISerializable &model) { + utils::Assert(verify(checkpoint), "[%d] error when checkpointing", rank); + sync::CheckPoint(model); } - inline void OnCheckPoint(int rank, bool success) { - onRecord(checkpoint, rank, success); + inline void Broadcast(std::string *sendrecv_data, int root) { + utils::Assert(verify(broadcast), "[%d] error when broadcasting", rank); + sync::Bcast(sendrecv_data, root); + } - - // replay methods - inline bool AllReduce(int rank) { - return onReplay(allReduce, rank); - } - - inline bool Broadcast(int rank) { - return onReplay(broadcast, rank); - } - - inline bool LoadCheckPoint(int rank) { - return onReplay(loadCheckpoint, rank); - } - - inline bool CheckPoint(int rank) { - return onReplay(checkpoint, rank); - } - - private: - inline void onRecord(Map& m, int rank, bool success) { - utils::Check(record, "Not in record state"); - Map::iterator it = m.find(rank); - if (it == m.end()) { - std::queue aQueue; - m[rank] = aQueue; + inline void Init(char* config) { + utils::ConfigIterator itr(config); + while (itr.Next()) { + char round[4], node_rank[4]; + sscanf(itr.name(), "%[^_]_%s", round, node_rank); + int i_round = atoi(round); + if (i_round == 1) { + int i_node_rank = atoi(node_rank); + if (i_node_rank == rank) { + printf("[%d] round %d, value %s\n", rank, i_round, itr.val()); + if (strcmp("allreduce", itr.val())) record(allReduce); + else if (strcmp("broadcast", itr.val())) record(broadcast); + else if (strcmp("loadcheckpoint", itr.val())) record(loadCheckpoint); + else if (strcmp("checkpoint", itr.val())) record(checkpoint); + } + } } - m[rank].push(success); } - inline bool onReplay(Map& m, int rank) { - utils::Check(!record, "Not in replay state"); - utils::Check(m.find(rank) != m.end(), "Not recorded"); + inline void record(std::map& m) { + m[rank] = false; + } + + inline bool verify(std::map& m) { bool result = true; - if (!m[rank].empty()) { - result = m[rank].front(); - m[rank].pop(); + if (m.find(rank) != m.end()) { + result = m[rank]; } return result; } - // flag to indicate if the mock is in record state - bool record; - - Map allReduce; - Map broadcast; - Map loadCheckpoint; - Map checkpoint; + int rank; + std::map allReduce; + std::map broadcast; + std::map loadCheckpoint; + std::map checkpoint; }; } diff --git a/test/Makefile b/test/Makefile index ef752de74..c71db5f86 100644 --- a/test/Makefile +++ b/test/Makefile @@ -9,10 +9,6 @@ ifeq ($(no_omp),1) else CFLAGS += -fopenmp endif -ifeq ($(test),1) - CFLAGS += -DTEST -endif - # specify tensor path BIN = test_allreduce diff --git a/test/test.sh b/test/test.sh index 5e5ef546d..753085724 100755 --- a/test/test.sh +++ b/test/test.sh @@ -1,7 +1,7 @@ #!/bin/bash -if [ "$#" -ne 2 ]; +if [ "$#" -ne 3 ]; then - echo "Usage " + echo "Usage " exit -1 fi -../submit_job_tcp.py $1 test_allreduce $2 +../submit_job_tcp.py $1 test_allreduce $2 $3 diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 185d8ea6d..807b1a1bb 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -8,7 +8,7 @@ using namespace sync; -inline void TestMax(size_t n) { +inline void TestMax(test::Mock &mock, size_t n) { int rank = sync::GetRank(); int nproc = sync::GetWorldSize(); @@ -16,7 +16,7 @@ inline void TestMax(size_t n) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % 111; } - sync::AllReduce(&ndata[0], ndata.size()); + mock.AllReduce(&ndata[0], ndata.size()); for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % 111; for (int r = 0; r < nproc; ++r) { @@ -26,7 +26,7 @@ inline void TestMax(size_t n) { } } -inline void TestSum(size_t n) { +inline void TestSum(test::Mock &mock, size_t n) { int rank = sync::GetRank(); int nproc = sync::GetWorldSize(); const int z = 131; @@ -35,7 +35,7 @@ inline void TestSum(size_t n) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z; } - sync::AllReduce(&ndata[0], ndata.size()); + mock.AllReduce(&ndata[0], ndata.size()); for (size_t i = 0; i < ndata.size(); ++i) { float rsum = 0.0f; for (int r = 0; r < nproc; ++r) { @@ -46,7 +46,7 @@ inline void TestSum(size_t n) { } } -inline void TestBcast(size_t n, int root) { +inline void TestBcast(test::Mock &mock, size_t n, int root) { int rank = sync::GetRank(); std::string s; s.resize(n); for (size_t i = 0; i < n; ++i) { @@ -55,31 +55,16 @@ inline void TestBcast(size_t n, int root) { std::string res; if (root == rank) { res = s; - sync::Bcast(&res, root); + mock.Broadcast(&res, root); } else { - sync::Bcast(&res, root); + mock.Broadcast(&res, root); } utils::Check(res == s, "[%d] TestBcast fail", rank); } -// ugly stuff, just to see if it works. To be removed -inline void Record(test::Mock& mock, const int rank) { - switch(rank) { - case 0: - mock.OnAllReduce(0, false); - break; - case 1: - mock.OnAllReduce(1, false); - break; - case 2: - mock.OnAllReduce(2, true); - break; - } -} - int main(int argc, char *argv[]) { - if (argc < 2) { - printf("Usage: \n"); + if (argc < 3) { + printf("Usage: \n"); return 0; } int n = atoi(argv[1]); @@ -87,17 +72,12 @@ int main(int argc, char *argv[]) { int rank = sync::GetRank(); std::string name = sync::GetProcessorName(); - #ifdef TEST - test::Mock mock; - Record(mock, rank); - mock.Replay(); - sync::SetMock(mock); - #endif + test::Mock mock(rank, argv[2]); printf("[%d] start at %s\n", rank, name.c_str()); - TestMax(n); + TestMax(mock, n); printf("[%d] TestMax pass\n", rank); - TestSum(n); + TestSum(mock, n); printf("[%d] TestSum pass\n", rank); sync::Finalize(); printf("[%d] all check pass\n", rank); diff --git a/test/testcase0.conf b/test/testcase0.conf new file mode 100644 index 000000000..4c324d282 --- /dev/null +++ b/test/testcase0.conf @@ -0,0 +1 @@ +# Test Case 0 -> nothing fails \ No newline at end of file diff --git a/test/testcase1.conf b/test/testcase1.conf new file mode 100644 index 000000000..f5aa31892 --- /dev/null +++ b/test/testcase1.conf @@ -0,0 +1,12 @@ +# Test Case example config +# You configure which methods should fail +# Format _ = +# can be one of the following = allreduce, broadcast, loadcheckpoint, checkpoint + +1_0 = allreduce +1_1 = broadcast +1_2 = loadcheckpoint +1_3 = checkpoint + +2_0 = allreduce +2_2 = checkpoint From a8128493c29b57924e81b2de56c6fb952de7b1c2 Mon Sep 17 00:00:00 2001 From: nachocano Date: Fri, 28 Nov 2014 01:48:26 -0800 Subject: [PATCH 008/531] execute it like this: ./test.sh 4 4000 testcase0.conf ./ Now we are passing the folder where the round instances are saved. The problem is that calling utils::Check or utils::Assert on 1 or 2 nodes, shutdowns all of them. Only those should be shutdown and this will work. There maybe some other mechanism to shutdown a particular node. Tianqi? --- src/mock.h | 52 ++++++++++++++++++++++++++++++----------- test/test.sh | 7 +++--- test/test_allreduce.cpp | 2 +- test/testcase1.conf | 5 +--- 4 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/mock.h b/src/mock.h index 8eb5629e6..d6afd49c6 100644 --- a/src/mock.h +++ b/src/mock.h @@ -7,7 +7,9 @@ */ #include "./allreduce.h" #include "./config.h" - #include +#include +#include +#include /*! \brief namespace of mock */ @@ -18,8 +20,8 @@ class Mock { public: - Mock(const int& rank, char *config) : rank(rank) { - Init(config); + explicit Mock(const int& rank, char *config, char* round_dir) : rank(rank) { + Init(config, round_dir); } template @@ -46,20 +48,42 @@ public: private: - inline void Init(char* config) { + inline void Init(char* config, char* round_dir) { + std::stringstream ss; + ss << round_dir << "node" << rank << ".round"; + const char* round_file = ss.str().c_str(); + std::ifstream ifs(round_file); + int current_round = 1; + if (!ifs.good()) { + // file does not exists, it's the first time, so save the current round to 1 + std::ofstream ofs(round_file); + ofs << current_round; + ofs.close(); + } else { + // file does exists, read the previous round, increment by one, and save it back + ifs >> current_round; + current_round++; + ifs.close(); + std::ofstream ofs(round_file); + ofs << current_round; + ofs.close(); + } + printf("[%d] in round %d\n", rank, current_round); utils::ConfigIterator itr(config); while (itr.Next()) { char round[4], node_rank[4]; sscanf(itr.name(), "%[^_]_%s", round, node_rank); - int i_round = atoi(round); - if (i_round == 1) { - int i_node_rank = atoi(node_rank); - if (i_node_rank == rank) { - printf("[%d] round %d, value %s\n", rank, i_round, itr.val()); - if (strcmp("allreduce", itr.val())) record(allReduce); - else if (strcmp("broadcast", itr.val())) record(broadcast); - else if (strcmp("loadcheckpoint", itr.val())) record(loadCheckpoint); - else if (strcmp("checkpoint", itr.val())) record(checkpoint); + int i_node_rank = atoi(node_rank); + // if it's something for me + if (i_node_rank == rank) { + int i_round = atoi(round); + // in my current round + if (i_round == current_round) { + printf("[%d] round %d, value %s\n", rank, i_round, itr.val()); + if (strcmp("allreduce", itr.val())) record(allReduce); + else if (strcmp("broadcast", itr.val())) record(broadcast); + else if (strcmp("loadcheckpoint", itr.val())) record(loadCheckpoint); + else if (strcmp("checkpoint", itr.val())) record(checkpoint); } } } @@ -82,6 +106,8 @@ private: std::map broadcast; std::map loadCheckpoint; std::map checkpoint; + + }; } diff --git a/test/test.sh b/test/test.sh index 753085724..5c70404ac 100755 --- a/test/test.sh +++ b/test/test.sh @@ -1,7 +1,8 @@ #!/bin/bash -if [ "$#" -ne 3 ]; +if [ "$#" -ne 4 ]; then - echo "Usage " + echo "Usage " exit -1 fi -../submit_job_tcp.py $1 test_allreduce $2 $3 + +../submit_job_tcp.py $1 test_allreduce $2 $3 $4 \ No newline at end of file diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 807b1a1bb..9afdc6d03 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -72,7 +72,7 @@ int main(int argc, char *argv[]) { int rank = sync::GetRank(); std::string name = sync::GetProcessorName(); - test::Mock mock(rank, argv[2]); + test::Mock mock(rank, argv[2], argv[3]); printf("[%d] start at %s\n", rank, name.c_str()); TestMax(mock, n); diff --git a/test/testcase1.conf b/test/testcase1.conf index f5aa31892..cc9bd662c 100644 --- a/test/testcase1.conf +++ b/test/testcase1.conf @@ -5,8 +5,5 @@ 1_0 = allreduce 1_1 = broadcast -1_2 = loadcheckpoint -1_3 = checkpoint -2_0 = allreduce -2_2 = checkpoint +2_2 = allreduce From a30075794be42bcaab18a7f56efab652c42f5a89 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 28 Nov 2014 15:56:12 -0800 Subject: [PATCH 009/531] initial version of robust engine, add discard link, need more random mock test, next milestone will be recovery --- src/engine.h | 2 +- src/engine_robust.cpp | 657 ++++++++++++++++++++++++++++++++++++++++++ src/socket.h | 23 +- test/Makefile | 5 +- 4 files changed, 677 insertions(+), 10 deletions(-) create mode 100644 src/engine_robust.cpp diff --git a/src/engine.h b/src/engine.h index ce8603d6f..510f3aabd 100644 --- a/src/engine.h +++ b/src/engine.h @@ -36,7 +36,7 @@ class IEngine { * \brief perform in-place allreduce, on sendrecvbuf * this function is NOT thread-safe * \param sendrecvbuf_ buffer for both sending and recving data - * \param type_n4bytes the unit number of bytes the type have + * \param type_nbytes the unit number of bytes the type have * \param count number of elements to be reduced * \param reducer reduce function */ diff --git a/src/engine_robust.cpp b/src/engine_robust.cpp new file mode 100644 index 000000000..ad2ff34da --- /dev/null +++ b/src/engine_robust.cpp @@ -0,0 +1,657 @@ +/*! + * \file engine_robust.cpp + * \brief Robust implementation of AllReduce + * using TCP non-block socket and tree-shape reduction. + * + * This implementation considers the failure of nodes + * + * \author Tianqi, Nacho, Tianyi + */ +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE +#define NOMINMAX +#include +#include +#include +#include "./utils.h" +#include "./engine.h" +#include "./socket.h" + +namespace MPI { +// MPI data type to be compatible with existing MPI interface +class Datatype { + public: + size_t type_size; + Datatype(size_t type_size) : type_size(type_size) {} +}; +} + +namespace engine { +/*! \brief implementation of fault tolerant all reduce engine */ +class AllReduceManager : public IEngine { + public: + // magic number to verify server + const static int kMagic = 0xff99; + // constant one byte out of band message to indicate error happening + // and mark for channel cleanup + const static char kOOBReset = 95; + + AllReduceManager(void) { + master_uri = "NULL"; + master_port = 9000; + host_uri = ""; + slave_port = 9010; + nport_trial = 1000; + rank = 0; + world_size = 1; + this->SetParam("reduce_buffer", "256MB"); + } + ~AllReduceManager(void) { + } + inline void Shutdown(void) { + for (size_t i = 0; i < links.size(); ++i) { + links[i].sock.Close(); + } + links.clear(); + utils::TCPSocket::Finalize(); + } + /*! \brief set parameters to the sync manager */ + inline void SetParam(const char *name, const char *val) { + if (!strcmp(name, "master_uri")) master_uri = val; + if (!strcmp(name, "master_port")) master_port = atoi(val); + if (!strcmp(name, "reduce_buffer")) { + char unit; + unsigned long amount; + if (sscanf(val, "%lu%c", &amount, &unit) == 2) { + switch (unit) { + case 'B': reduce_buffer_size = (amount + 7)/ 8; break; + case 'K': reduce_buffer_size = amount << 7UL; break; + case 'M': reduce_buffer_size = amount << 17UL; break; + case 'G': reduce_buffer_size = amount << 27UL; break; + default: utils::Error("invalid format for reduce buffer"); + } + } else { + utils::Error("invalid format for reduce_buffer, shhould be {integer}{unit}, unit can be {B, KB, MB, GB}"); + } + } + } + // initialize the manager + inline void Init(void) { + utils::Socket::Startup(); + // single node mode + if (master_uri == "NULL") return; + utils::Assert(links.size() == 0, "can only call Init once"); + int magic = kMagic; + int nchild = 0, nparent = 0; + this->host_uri = utils::SockAddr::GetHostName(); + // get information from master + utils::TCPSocket master; + master.Create(); + if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) { + utils::Socket::Error("Connect"); + } + utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 1"); + utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 2"); + utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); + utils::Assert(master.RecvAll(&rank, sizeof(rank)) == sizeof(rank), "sync::Init failure 3"); + utils::Assert(master.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), "sync::Init failure 4"); + utils::Assert(master.RecvAll(&nparent, sizeof(nparent)) == sizeof(nparent), "sync::Init failure 5"); + utils::Assert(master.RecvAll(&nchild, sizeof(nchild)) == sizeof(nchild), "sync::Init failure 6"); + utils::Assert(nchild >= 0, "in correct number of childs"); + utils::Assert(nparent == 1 || nparent == 0, "in correct number of parent"); + + // create listen + utils::TCPSocket sock_listen; + sock_listen.Create(); + int port = sock_listen.TryBindHost(slave_port, slave_port + nport_trial); + utils::Check(port != -1, "sync::Init fail to bind the ports specified"); + sock_listen.Listen(); + + if (nparent != 0) { + parent_index = 0; + links.push_back(LinkRecord()); + int len, hport; + std::string hname; + utils::Assert(master.RecvAll(&len, sizeof(len)) == sizeof(len), "sync::Init failure 9"); + hname.resize(len); + utils::Assert(len != 0, "string must not be empty"); + utils::Assert(master.RecvAll(&hname[0], len) == static_cast(len), "sync::Init failure 10"); + utils::Assert(master.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "sync::Init failure 11"); + links[0].sock.Create(); + links[0].sock.Connect(utils::SockAddr(hname.c_str(), hport)); + utils::Assert(links[0].sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 12"); + utils::Assert(links[0].sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 13"); + utils::Check(magic == kMagic, "sync::Init failure, parent magic number mismatch"); + parent_index = 0; + } else { + parent_index = -1; + } + // send back socket listening port to master + utils::Assert(master.SendAll(&port, sizeof(port)) == sizeof(port), "sync::Init failure 14"); + // close connection to master + master.Close(); + // accept links from childs + for (int i = 0; i < nchild; ++i) { + LinkRecord r; + while (true) { + r.sock = sock_listen.Accept(); + if (r.sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic) && magic == kMagic) { + utils::Assert(r.sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 15"); + break; + } else { + // not a valid child + r.sock.Close(); + } + } + links.push_back(r); + } + // close listening sockets + sock_listen.Close(); + // setup selecter + selecter.Clear(); + for (size_t i = 0; i < links.size(); ++i) { + // set the socket to non-blocking mode + links[i].sock.SetNonBlock(true); + selecter.WatchRead(links[i].sock); + selecter.WatchWrite(links[i].sock); + selecter.WatchException(links[i].sock); + } + // done + } + /*! \brief get rank */ + virtual int GetRank(void) const { + return rank; + } + /*! \brief get rank */ + virtual int GetWorldSize(void) const { + return world_size; + } + /*! \brief get rank */ + virtual std::string GetHost(void) const { + return host_uri; + } + virtual void AllReduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer) { + while (true) { + if (rank == rand() % 3) TryResetLinks(); + ReturnType ret = TryAllReduce(sendrecvbuf_, type_nbytes, count, reducer); + if (ret == kSuccess) return; + if (ret == kSockError) { + utils::Error("error occur during all reduce\n"); + } + utils::Check(TryResetLinks() == kSuccess, "error when reset links"); + } + } + /*! + * \brief broadcast data from root to all nodes + * \param sendrecvbuf_ buffer for both sending and recving data + * \param size the size of the data to be broadcasted + * \param root the root worker id to broadcast the data + */ + virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { + if (links.size() == 0) return; + // number of links + const int nlink = static_cast(links.size()); + // size of space already read from data + size_t size_in = 0; + // input link, -2 means unknown yet, -1 means this is root + int in_link = -2; + + // initialize the link statistics + for (int i = 0; i < nlink; ++i) { + links[i].ResetSize(); + } + // root have all the data + if (this->rank == root) { + size_in = total_size; + in_link = -1; + } + + // while we have not passed the messages out + while(true) { + selecter.Select(); + if (in_link == -2) { + // probe in-link + for (int i = 0; i < nlink; ++i) { + if (selecter.CheckRead(links[i].sock)) { + if (!links[i].ReadToArray(sendrecvbuf_, total_size)) { + utils::Socket::Error("Recv"); + } + size_in = links[i].size_read; + if (size_in != 0) { + in_link = i; break; + } + } + } + } else { + // read from in link + if (in_link >= 0 && selecter.CheckRead(links[in_link].sock)) { + if(!links[in_link].ReadToArray(sendrecvbuf_, total_size)) { + utils::Socket::Error("Recv"); + } + size_in = links[in_link].size_read; + } + } + size_t nfinished = total_size; + // send data to all out-link + for (int i = 0; i < nlink; ++i) { + if (i != in_link) { + if (selecter.CheckWrite(links[i].sock)) { + if (!links[i].WriteFromArray(sendrecvbuf_, size_in)) { + utils::Socket::Error("Send"); + } + } + nfinished = std::min(nfinished, links[i].size_write); + } + } + // check boundary condition + if (nfinished >= total_size) break; + } + } + virtual bool LoadCheckPoint(utils::ISerializable *p_model) { + return false; + } + virtual void CheckPoint(const utils::ISerializable &model) { + } + + protected: + // possible returning type from the Try Functions + enum ReturnType { + kSuccess, + kSockError, + kGetExcept + }; + // possible state of the server + enum ServerState { + kNormal, + kConnDrop, + kRecover + }; + // cleanup the links, by sending OOB message + inline ReturnType TryResetLinks(void) { + // number of links + const int nlink = static_cast(links.size()); + for (int i = 0; i < nlink; ++i) { + links[i].InitBuffer(sizeof(int), 1 << 10, reduce_buffer_size); + links[i].ResetSize(); + } + printf("[%d] start to reset link\n", rank); + while (true) { + if (selecter.Select() == -1) { + if (errno == EBADF || errno == EINTR) return kSockError; + utils::Socket::Error("select"); + } + bool finished = true; + for (int i = 0; i < nlink; ++i) { + if (selecter.CheckWrite(links[i].sock)) { + if (links[i].size_write == 0) { + char sig = kOOBReset; + ssize_t len = links[i].sock.Send(&sig, sizeof(sig), MSG_OOB); + if (len != -1) { + links[i].size_write += len; + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + } + } + } + // need to send OOB to every other link + if (links[i].size_write == 0) finished = false; + // need to receive OOB from every link, or already cleanup some link + if (!links[i].oob_clear && !selecter.CheckExcept(links[i].sock)) finished = false; + } + if (finished) break; + } + printf("[%d] start to discard link\n", rank); + // read and discard data from all channels until pass mark + while (true) { + if (selecter.Select() == -1) { + if (errno == EBADF || errno == EINTR) return kSockError; + utils::Socket::Error("select"); + } + bool finished = true; + for (int i = 0; i < nlink; ++i) { + if (selecter.CheckExcept(links[i].sock)) { + int atmark = links[i].sock.AtMark(); + if (atmark < 0) return kSockError; + if (atmark == 1) { + char oob_msg; + ssize_t len = links[i].sock.Recv(&oob_msg, sizeof(oob_msg), MSG_OOB); + if (len == -1 && errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + utils::Assert(oob_msg == kOOBReset, "wrong oob msg"); + } else { + ssize_t len = links[i].sock.Recv(links[i].buffer_head, links[i].buffer_size); + if (len == -1) { + // when error happens here, oob_clear will remember + if (errno == EAGAIN && errno == EWOULDBLOCK) printf("would block\n"); + } else { + printf("[%d] discard %ld bytes\n", rank, len); + } + // the existing exception already cleared by this loop + if (len == -1 && errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + } + finished = false; + } else { + links[i].oob_clear = true; + } + } + if (finished) break; + } + // mark oob_clear mark as false + for (int i = 0; i < nlink; ++i) { + links[i].oob_clear = false; + } + return kSuccess; + } + // Run AllReduce, return if success + inline ReturnType TryAllReduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer) { + if (links.size() == 0) return kSuccess; + // total size of message + const size_t total_size = type_nbytes * count; + // number of links + const int nlink = static_cast(links.size()); + // send recv buffer + char *sendrecvbuf = reinterpret_cast(sendrecvbuf_); + // size of space that we already performs reduce in up pass + size_t size_up_reduce = 0; + // size of space that we have already passed to parent + size_t size_up_out = 0; + // size of message we received, and send in the down pass + size_t size_down_in = 0; + // initialize the link ring-buffer and pointer + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + links[i].InitBuffer(type_nbytes, count, reduce_buffer_size); + } + links[i].ResetSize(); + } + // if no childs, no need to reduce + if (nlink == static_cast(parent_index != -1)) { + size_up_reduce = total_size; + } + + // while we have not passed the messages out + while (true) { + if (selecter.Select() == -1) { + if (errno == EBADF || errno == EINTR) return kSockError; + utils::Socket::Error("select"); + } + // exception handling + for (int i = 0; i < nlink; ++i) { + // recive OOB message from some link + if (selecter.CheckExcept(links[i].sock)) return kGetExcept; + } + // read data from childs + for (int i = 0; i < nlink; ++i) { + if (i != parent_index && selecter.CheckRead(links[i].sock)) { + if (!links[i].ReadToRingBuffer(size_up_out)) return kSockError; + } + } + // this node have childs, peform reduce + if (nlink > static_cast(parent_index != -1)) { + size_t buffer_size = 0; + // do upstream reduce + size_t max_reduce = total_size; + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + max_reduce= std::min(max_reduce, links[i].size_read); + utils::Assert(buffer_size == 0 || buffer_size == links[i].buffer_size, + "buffer size inconsistent"); + buffer_size = links[i].buffer_size; + } + } + utils::Assert(buffer_size != 0, "must assign buffer_size"); + // round to type_n4bytes + max_reduce = (max_reduce / type_nbytes * type_nbytes); + // peform reduce, can be at most two rounds + while (size_up_reduce < max_reduce) { + // start position + size_t start = size_up_reduce % buffer_size; + // peform read till end of buffer + size_t nread = std::min(buffer_size - start, max_reduce - size_up_reduce); + utils::Assert(nread % type_nbytes == 0, "AllReduce: size check"); + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + reducer(links[i].buffer_head + start, + sendrecvbuf + size_up_reduce, + static_cast(nread / type_nbytes), + MPI::Datatype(type_nbytes)); + } + } + size_up_reduce += nread; + } + } + if (parent_index != -1) { + // pass message up to parent, can pass data that are already been reduced + if (selecter.CheckWrite(links[parent_index].sock)) { + ssize_t len = links[parent_index].sock. + Send(sendrecvbuf + size_up_out, size_up_reduce - size_up_out); + if (len != -1) { + size_up_out += static_cast(len); + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + } + } + // read data from parent + if (selecter.CheckRead(links[parent_index].sock)) { + ssize_t len = links[parent_index].sock. + Recv(sendrecvbuf + size_down_in, total_size - size_down_in); + if (len != -1) { + size_down_in += static_cast(len); + utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error"); + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + } + } + } else { + // this is root, can use reduce as most recent point + size_down_in = size_up_out = size_up_reduce; + } + // check if we finished the job of message passing + size_t nfinished = size_down_in; + // can pass message down to childs + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + if (selecter.CheckWrite(links[i].sock)) { + if (!links[i].WriteFromArray(sendrecvbuf, size_down_in)) return kSockError; + } + nfinished = std::min(links[i].size_write, nfinished); + } + } + // check boundary condition + if (nfinished >= total_size) break; + } + return kSuccess; + } + + private: + // link record to a neighbor + struct LinkRecord { + public: + // socket to get data from/to link + utils::TCPSocket sock; + // size of data readed from link + size_t size_read; + // size of data sent to the link + size_t size_write; + // pointer to buffer head + char *buffer_head; + // buffer size, in bytes + size_t buffer_size; + // state used by TryResetLinks, whether a link is already cleaned from OOB mark + bool oob_clear; + // constructor + LinkRecord(void) : oob_clear(false) {} + // initialize buffer + inline void InitBuffer(size_t type_nbytes, size_t count, size_t reduce_buffer_size) { + size_t n = (type_nbytes * count + 7)/ 8; + buffer_.resize(std::min(reduce_buffer_size, n)); + // make sure align to type_nbytes + buffer_size = buffer_.size() * sizeof(uint64_t) / type_nbytes * type_nbytes; + utils::Assert(type_nbytes <= buffer_size, "too large type_nbytes=%lu, buffer_size=%lu", type_nbytes, buffer_size); + // set buffer head + buffer_head = reinterpret_cast(BeginPtr(buffer_)); + } + // reset the recv and sent size + inline void ResetSize(void) { + size_write = size_read = 0; + } + /*! + * \brief read data into ring-buffer, with care not to existing useful override data + * position after protect_start + * \param protect_start all data start from protect_start is still needed in buffer + * read shall not override this + * \return true if it is an successful read, false if there is some error happens, check errno + */ + inline bool ReadToRingBuffer(size_t protect_start) { + size_t ngap = size_read - protect_start; + utils::Assert(ngap <= buffer_size, "AllReduce: boundary check"); + size_t offset = size_read % buffer_size; + size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); + ssize_t len = sock.Recv(buffer_head + offset, nmax); + if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; + size_read += static_cast(len); + return true; + } + /*! + * \brief read data into array, + * this function can not be used together with ReadToRingBuffer + * a link can either read into the ring buffer, or existing array + * \param max_size maximum size of array + * \return true if it is an successful read, false if there is some error happens, check errno + */ + inline bool ReadToArray(void *recvbuf_, size_t max_size) { + char *p = static_cast(recvbuf_); + ssize_t len = sock.Recv(p + size_read, max_size - size_read); + if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; + size_read += static_cast(len); + return true; + } + /*! + * \brief write data in array to sock + * \param sendbuf_ head of array + * \param max_size maximum size of array + * \return true if it is an successful write, false if there is some error happens, check errno + */ + inline bool WriteFromArray(const void *sendbuf_, size_t max_size) { + const char *p = static_cast(sendbuf_); + ssize_t len = sock.Send(p + size_write, max_size - size_write); + if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; + size_write += static_cast(len); + return true; + } + + private: + // recv buffer to get data from child + // aligned with 64 bits, will be able to perform 64 bits operations freely + std::vector buffer_; + }; + // data structure to remember result of Bcast and AllReduce calls + class ResultBuffer { + public: + // constructor + ResultBuffer(void) { + this->Clear(); + } + // clear the existing record + inline void Clear(void) { + seqno_.clear(); size_.clear(); + rptr_.clear(); rptr_.push_back(0); + data_.clear(); + } + // allocate temporal space for + inline void *AllocTemp(size_t type_nbytes, size_t count) { + size_t size = type_nbytes * count; + size_t nhop = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); + utils::Assert(nhop != 0, "cannot allocate 0 size memory"); + data_.resize(rptr_.back() + nhop); + return BeginPtr(data_) + rptr_.back(); + } + // push the result in temp to the + inline void PushTemp(int seqid, size_t type_nbytes, size_t count) { + size_t size = type_nbytes * count; + size_t nhop = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); + if (seqno_.size() != 0) { + utils::Assert(seqno_.back() < seqid, "PushTemp seqid inconsistent"); + } + seqno_.push_back(seqid); + rptr_.push_back(rptr_.back() + nhop); + size_.push_back(size); + utils::Assert(data_.size() == rptr_.back(), "PushTemp inconsistent"); + } + // return the stored result of seqid, if any + inline void* Query(int seqid, size_t *p_size) { + size_t idx = std::lower_bound(seqno_.begin(), seqno_.end(), seqid) - seqno_.begin(); + if (idx == seqno_.size() || seqno_[idx] != seqid) return NULL; + *p_size = size_[idx]; + return BeginPtr(data_) + rptr_[idx]; + } + private: + // sequence number of each + std::vector seqno_; + // pointer to the positions + std::vector rptr_; + // actual size of each buffer + std::vector size_; + // content of the buffer + std::vector data_; + }; + //---- recovery data structure ---- + // call sequence counter, records how many calls we made so far + // from last call to CheckPoint, LoadCheckPoint + int seq_counter; + // result buffer + ResultBuffer resbuf; + // model that is saved from last CheckPoint + std::string check_point; + //---- local data related to link ---- + // index of parent link, can be -1, meaning this is root of the tree + int parent_index; + // sockets of all links + std::vector links; + // select helper + utils::SelectHelper selecter; + //----- meta information----- + // uri of current host, to be set by Init + std::string host_uri; + // uri of master + std::string master_uri; + // port of master address + int master_port; + // port of slave process + int slave_port, nport_trial; + // reduce buffer size + size_t reduce_buffer_size; + // current rank + int rank; + // world size + int world_size; +}; + +// singleton sync manager +AllReduceManager manager; + +/*! \brief intiialize the synchronization module */ +void Init(int argc, char *argv[]) { + for (int i = 1; i < argc; ++i) { + char name[256], val[256]; + if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { + manager.SetParam(name, val); + } + } + manager.Init(); +} + +/*! \brief finalize syncrhonization module */ +void Finalize(void) { + manager.Shutdown(); +} +/*! \brief singleton method to get engine */ +IEngine *GetEngine(void) { + return &manager; +} +} // namespace engine diff --git a/src/socket.h b/src/socket.h index 307ec89df..9cbf1bcea 100644 --- a/src/socket.h +++ b/src/socket.h @@ -17,6 +17,7 @@ #include #include #include +#include #endif #include #include @@ -222,6 +223,19 @@ class TCPSocket : public Socket{ } return TCPSocket(newfd); } + /*! + * \brief decide whether the socket is at OOB mark + * \return 1 if at mark, 0 if not, -1 if an error occured + */ + inline int AtMark(void) const { + int atmark; +#ifdef _WIN32 + if (ioctlsocket(sockfd, SIOCATMARK, &atmark) != NO_ERROR) return -1; +#else + if (ioctl(sockfd, SIOCATMARK, &atmark) == -1) return -1; +#endif + return atmark; + } /*! * \brief connect to an address * \param addr the address to connect to @@ -299,8 +313,6 @@ class TCPSocket : public Socket{ } return ndone; } - - private: }; /*! \brief helper data structure to perform select */ @@ -366,7 +378,8 @@ struct SelectHelper { /*! * \brief peform select on the set defined * \param timeout specify timeout in micro-seconds(ms) if equals 0, means select will always block - * \return number of active descriptors selected + * \return number of active descriptors selected, + * return -1 if error occurs */ inline int Select(long timeout = 0) { FD_ZERO(&read_set); @@ -392,10 +405,6 @@ struct SelectHelper { ret = select(static_cast(maxfd + 1), &read_set, &write_set, &except_set, &tm); } - if (ret == -1) { - int errsv = errno; - Error("Select Error: %s", strerror(errsv)); - } return ret; } diff --git a/test/Makefile b/test/Makefile index c71db5f86..c773fe45b 100644 --- a/test/Makefile +++ b/test/Makefile @@ -12,13 +12,14 @@ endif # specify tensor path BIN = test_allreduce -OBJ = engine_tcp.o +OBJ = engine_robust.o engine_tcp.o .PHONY: clean all all: $(BIN) $(MPIBIN) engine_tcp.o: ../src/engine_tcp.cpp ../src/*.h -test_allreduce: test_allreduce.cpp ../src/*.h engine_tcp.o +engine_robust.o: ../src/engine_robust.cpp ../src/*.h +test_allreduce: test_allreduce.cpp ../src/*.h engine_robust.o $(BIN) : $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) From aa54a038f2c1979081a3c1e3030ed048ee89cdba Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 28 Nov 2014 21:56:58 -0800 Subject: [PATCH 010/531] livelock in oob send recv --- src/engine_robust.cpp | 169 +++++++++++++++++++++++++++++------------- src/socket.h | 93 ++++++++++++++--------- 2 files changed, 173 insertions(+), 89 deletions(-) diff --git a/src/engine_robust.cpp b/src/engine_robust.cpp index ad2ff34da..9ed2a31fb 100644 --- a/src/engine_robust.cpp +++ b/src/engine_robust.cpp @@ -35,6 +35,8 @@ class AllReduceManager : public IEngine { // constant one byte out of band message to indicate error happening // and mark for channel cleanup const static char kOOBReset = 95; + // and mark for channel cleanup + const static char kOOBResetAck = 97; AllReduceManager(void) { master_uri = "NULL"; @@ -148,13 +150,9 @@ class AllReduceManager : public IEngine { // close listening sockets sock_listen.Close(); // setup selecter - selecter.Clear(); for (size_t i = 0; i < links.size(); ++i) { // set the socket to non-blocking mode links[i].sock.SetNonBlock(true); - selecter.WatchRead(links[i].sock); - selecter.WatchWrite(links[i].sock); - selecter.WatchException(links[i].sock); } // done } @@ -211,7 +209,13 @@ class AllReduceManager : public IEngine { // while we have not passed the messages out while(true) { - selecter.Select(); + // select helper + utils::SelectHelper selecter; + for (size_t i = 0; i < links.size(); ++i) { + selecter.WatchRead(links[i].sock); + selecter.WatchWrite(links[i].sock); + selecter.WatchException(links[i].sock); + } if (in_link == -2) { // probe in-link for (int i = 0; i < nlink; ++i) { @@ -277,71 +281,118 @@ class AllReduceManager : public IEngine { links[i].InitBuffer(sizeof(int), 1 << 10, reduce_buffer_size); links[i].ResetSize(); } - printf("[%d] start to reset link\n", rank); + printf("[%d] start to reset link\n", rank); while (true) { - if (selecter.Select() == -1) { - if (errno == EBADF || errno == EINTR) return kSockError; - utils::Socket::Error("select"); - } + printf("[%d] loop\n", rank); bool finished = true; for (int i = 0; i < nlink; ++i) { - if (selecter.CheckWrite(links[i].sock)) { - if (links[i].size_write == 0) { - char sig = kOOBReset; - ssize_t len = links[i].sock.Send(&sig, sizeof(sig), MSG_OOB); - if (len != -1) { - links[i].size_write += len; - } else { - if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; - } + if (links[i].sock.BadSocket()) continue; + if (links[i].size_write == 0) { + char sig = kOOBReset; + ssize_t len = links[i].sock.Send(&sig, sizeof(sig), MSG_OOB); + // error will be filtered in next loop + if (len != -1) { + links[i].size_write += len; + printf("[%d] send OOB success\n", rank); } } // need to send OOB to every other link if (links[i].size_write == 0) finished = false; - // need to receive OOB from every link, or already cleanup some link - if (!links[i].oob_clear && !selecter.CheckExcept(links[i].sock)) finished = false; } if (finished) break; } + printf("[%d] finish send all OOB\n", rank); + // wait for incoming except from all links + for (int i = 0; i < nlink; ++ i) { + if (links[i].sock.BadSocket()) continue; + printf("[%d] wait except\n", rank); + if (utils::SelectHelper::WaitExcept(links[i].sock) == -1) { + utils::Socket::Error("select"); + } + printf("[%d] finish wait except\n", rank); + } printf("[%d] start to discard link\n", rank); // read and discard data from all channels until pass mark while (true) { - if (selecter.Select() == -1) { - if (errno == EBADF || errno == EINTR) return kSockError; - utils::Socket::Error("select"); - } + utils::SelectHelper rsel; bool finished = true; for (int i = 0; i < nlink; ++i) { - if (selecter.CheckExcept(links[i].sock)) { + if (links[i].sock.BadSocket()) continue; + if (links[i].size_read == 0) { int atmark = links[i].sock.AtMark(); if (atmark < 0) return kSockError; if (atmark == 1) { char oob_msg; ssize_t len = links[i].sock.Recv(&oob_msg, sizeof(oob_msg), MSG_OOB); - if (len == -1 && errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; - utils::Assert(oob_msg == kOOBReset, "wrong oob msg"); - } else { - ssize_t len = links[i].sock.Recv(links[i].buffer_head, links[i].buffer_size); - if (len == -1) { - // when error happens here, oob_clear will remember - if (errno == EAGAIN && errno == EWOULDBLOCK) printf("would block\n"); - } else { - printf("[%d] discard %ld bytes\n", rank, len); + if (len == -1 && errno != EAGAIN && errno != EWOULDBLOCK) { + finished = false; continue; } - // the existing exception already cleared by this loop - if (len == -1 && errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + utils::Assert(oob_msg == kOOBReset, "wrong oob msg"); + links[i].size_read = 1; + } else { + finished = false; + rsel.WatchRead(links[i].sock); } - finished = false; - } else { - links[i].oob_clear = true; } } if (finished) break; + // wait to read from the channels to discard data + rsel.Select(); + printf("[%d] select finish read from\n", rank); + for (int i = 0; i < nlink; ++i) { + if (links[i].sock.BadSocket()) continue; + if (rsel.CheckRead(links[i].sock)) { + ssize_t len = links[i].sock.Recv(links[i].buffer_head, links[i].buffer_size); + // zero length, remote closed the connection, close socket + if (len == 0) { + links[i].sock.Close(); + } else if (len == -1) { + // when error happens here, oob_clear will remember + if (errno == EAGAIN && errno == EWOULDBLOCK) printf("would block\n"); + } else { + printf("[%d] discard %ld bytes\n", rank, len); + } + } + } } - // mark oob_clear mark as false + printf("[%d] discard all success\n", rank); + // start synchronization step for (int i = 0; i < nlink; ++i) { - links[i].oob_clear = false; + links[i].ResetSize(); } + while (true) { + // selecter for TryResetLinks + utils::SelectHelper rsel; + for (int i = 0; i < nlink; ++i) { + if (links[i].sock.BadSocket()) continue; + if (links[i].size_read == 0) rsel.WatchRead(links[i].sock); + if (links[i].size_write == 0) rsel.WatchWrite(links[i].sock); + } + printf("[%d] before select\n", rank); + rsel.Select(); + printf("[%d] after select\n", rank); + bool finished = true; + for (int i = 0; i < nlink; ++i) { + if (links[i].sock.BadSocket()) continue; + if (links[i].size_read == 0 && rsel.CheckRead(links[i].sock)) { + char ack; + links[i].ReadToArray(&ack, sizeof(ack)); + if (links[i].size_read != 0) { + utils::Assert(ack == kOOBResetAck, "expect ack message"); + } + } + if (links[i].size_write == 0 && rsel.CheckWrite(links[i].sock)) { + char ack = kOOBResetAck; + links[i].WriteFromArray(&ack, sizeof(ack)); + } + if (links[i].size_read == 0 || links[i].size_write == 0) finished = false; + } + if (finished) break; + } + printf("[%d] after the read write data success\n", rank); + for (int i = 0; i < nlink; ++i) { + if (links[i].sock.BadSocket()) return kSockError; + } return kSuccess; } // Run AllReduce, return if success @@ -376,10 +427,15 @@ class AllReduceManager : public IEngine { // while we have not passed the messages out while (true) { - if (selecter.Select() == -1) { - if (errno == EBADF || errno == EINTR) return kSockError; - utils::Socket::Error("select"); + // select helper + utils::SelectHelper selecter; + for (size_t i = 0; i < links.size(); ++i) { + selecter.WatchRead(links[i].sock); + selecter.WatchWrite(links[i].sock); + selecter.WatchException(links[i].sock); } + // select must return + selecter.Select(); // exception handling for (int i = 0; i < nlink; ++i) { // recive OOB message from some link @@ -437,9 +493,12 @@ class AllReduceManager : public IEngine { } } // read data from parent - if (selecter.CheckRead(links[parent_index].sock)) { + if (selecter.CheckRead(links[parent_index].sock) && total_size > size_down_in) { ssize_t len = links[parent_index].sock. Recv(sendrecvbuf + size_down_in, total_size - size_down_in); + if (len == 0) { + links[parent_index].sock.Close(); return kSockError; + } if (len != -1) { size_down_in += static_cast(len); utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error"); @@ -482,10 +541,8 @@ class AllReduceManager : public IEngine { char *buffer_head; // buffer size, in bytes size_t buffer_size; - // state used by TryResetLinks, whether a link is already cleaned from OOB mark - bool oob_clear; // constructor - LinkRecord(void) : oob_clear(false) {} + LinkRecord(void) {} // initialize buffer inline void InitBuffer(size_t type_nbytes, size_t count, size_t reduce_buffer_size) { size_t n = (type_nbytes * count + 7)/ 8; @@ -511,8 +568,13 @@ class AllReduceManager : public IEngine { size_t ngap = size_read - protect_start; utils::Assert(ngap <= buffer_size, "AllReduce: boundary check"); size_t offset = size_read % buffer_size; - size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); + size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); + if (nmax == 0) return true; ssize_t len = sock.Recv(buffer_head + offset, nmax); + // length equals 0, remote disconnected + if (len == 0) { + sock.Close(); return false; + } if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; size_read += static_cast(len); return true; @@ -525,8 +587,13 @@ class AllReduceManager : public IEngine { * \return true if it is an successful read, false if there is some error happens, check errno */ inline bool ReadToArray(void *recvbuf_, size_t max_size) { + if (max_size == size_read ) return true; char *p = static_cast(recvbuf_); ssize_t len = sock.Recv(p + size_read, max_size - size_read); + // length equals 0, remote disconnected + if (len == 0) { + sock.Close(); return false; + } if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; size_read += static_cast(len); return true; @@ -613,8 +680,6 @@ class AllReduceManager : public IEngine { int parent_index; // sockets of all links std::vector links; - // select helper - utils::SelectHelper selecter; //----- meta information----- // uri of current host, to be set by Init std::string host_uri; diff --git a/src/socket.h b/src/socket.h index 9cbf1bcea..b9754f87d 100644 --- a/src/socket.h +++ b/src/socket.h @@ -164,6 +164,26 @@ class Socket { } return -1; } + /*! \brief get last error code if any */ + inline int GetSockError(void) const { + int error = 0; + socklen_t len = sizeof(error); + if (getsockopt(sockfd, SOL_SOCKET, SO_ERROR, &error, &len) != 0) { + Error("GetSockError"); + } + return error; + } + /*! \brief check if anything bad happens */ + inline bool BadSocket(void) const { + if (IsClosed()) return true; + int err = GetSockError(); + if (err == EBADF || err == EINTR) return true; + return false; + } + /*! \brief check if socket is already closed */ + inline bool IsClosed(void) const { + return sockfd == INVALID_SOCKET; + } /*! \brief close the socket */ inline void Close(void) { if (sockfd != INVALID_SOCKET) { @@ -177,7 +197,6 @@ class Socket { Error("Socket::Close double close the socket or close without create"); } } - // report an socket error inline static void Error(const char *msg) { int errsv = errno; @@ -267,9 +286,8 @@ class TCPSocket : public Socket{ */ inline ssize_t Recv(void *buf_, size_t len, int flags = 0) { char *buf = reinterpret_cast(buf_); - if (len == 0) return 0; return recv(sockfd, buf, static_cast(len), flags); - } + } /*! * \brief peform block write that will attempt to send all data out * can still return smaller than request when error occurs @@ -319,14 +337,17 @@ class TCPSocket : public Socket{ struct SelectHelper { public: SelectHelper(void) { - this->Clear(); + FD_ZERO(&read_set); + FD_ZERO(&write_set); + FD_ZERO(&except_set); + maxfd = 0; } /*! * \brief add file descriptor to watch for read * \param fd file descriptor to be watched */ inline void WatchRead(SOCKET fd) { - read_fds.push_back(fd); + FD_SET(fd, &read_set); if (fd > maxfd) maxfd = fd; } /*! @@ -334,7 +355,7 @@ struct SelectHelper { * \param fd file descriptor to be watched */ inline void WatchWrite(SOCKET fd) { - write_fds.push_back(fd); + FD_SET(fd, &write_set); if (fd > maxfd) maxfd = fd; } /*! @@ -342,7 +363,7 @@ struct SelectHelper { * \param fd file descriptor to be watched */ inline void WatchException(SOCKET fd) { - except_fds.push_back(fd); + FD_SET(fd, &except_set); if (fd > maxfd) maxfd = fd; } /*! @@ -367,51 +388,49 @@ struct SelectHelper { return FD_ISSET(fd, &except_set) != 0; } /*! - * \brief clear all the monitored descriptors + * \brief wait for exception event on a single descriptor + * \param fd the file descriptor to wait the event for + * \param timeout the timeout counter, can be 0, which means wait until the event happen + * \return 1 if success, 0 if timeout, and -1 if error occurs */ - inline void Clear(void) { - read_fds.clear(); - write_fds.clear(); - except_fds.clear(); - maxfd = 0; - } + inline static int WaitExcept(SOCKET fd, long timeout = 0) { + fd_set wait_set; + FD_ZERO(&wait_set); + FD_SET(fd, &wait_set); + return Select_(static_cast(fd + 1), NULL, NULL, &wait_set, timeout); + } /*! * \brief peform select on the set defined + * \param select_read whether to watch for read event + * \param select_write whether to watch for write event + * \param select_except whether to watch for exception event * \param timeout specify timeout in micro-seconds(ms) if equals 0, means select will always block * \return number of active descriptors selected, * return -1 if error occurs */ inline int Select(long timeout = 0) { - FD_ZERO(&read_set); - FD_ZERO(&write_set); - FD_ZERO(&except_set); - for (size_t i = 0; i < read_fds.size(); ++i) { - FD_SET(read_fds[i], &read_set); - } - for (size_t i = 0; i < write_fds.size(); ++i) { - FD_SET(write_fds[i], &write_set); - } - for (size_t i = 0; i < except_fds.size(); ++i) { - FD_SET(except_fds[i], &except_set); - } - int ret; - if (timeout == 0) { - ret = select(static_cast(maxfd + 1), &read_set, - &write_set, &except_set, NULL); - } else { - timeval tm; - tm.tv_usec = (timeout % 1000) * 1000; - tm.tv_sec = timeout / 1000; - ret = select(static_cast(maxfd + 1), &read_set, - &write_set, &except_set, &tm); + int ret = Select_(static_cast(maxfd + 1), + &read_set, &write_set, &except_set, timeout); + if (ret == -1) { + Socket::Error("Select"); } return ret; } private: + inline static int Select_(int maxfd, fd_set *rfds, fd_set *wfds, fd_set *efds, long timeout) { + if (timeout == 0) { + return select(maxfd, rfds, wfds, efds, NULL); + } else { + timeval tm; + tm.tv_usec = (timeout % 1000) * 1000; + tm.tv_sec = timeout / 1000; + return select(maxfd, rfds, wfds, efds, &tm); + } + } + SOCKET maxfd; fd_set read_set, write_set, except_set; - std::vector read_fds, write_fds, except_fds; }; } #endif From 98756c068a12d6855cd1c303750e9ed10ca9f62b Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 28 Nov 2014 21:58:15 -0800 Subject: [PATCH 011/531] livelock in oob send recv --- src/socket.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/socket.h b/src/socket.h index b9754f87d..a5238a6c0 100644 --- a/src/socket.h +++ b/src/socket.h @@ -273,7 +273,6 @@ class TCPSocket : public Socket{ */ inline ssize_t Send(const void *buf_, size_t len, int flag = 0) { const char *buf = reinterpret_cast(buf_); - if (len == 0) return 0; return send(sockfd, buf, static_cast(len), flag); } /*! From 42505f473dc91bc870d6ceaa6fb0128f0047a0e2 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 29 Nov 2014 15:14:43 -0800 Subject: [PATCH 012/531] finish reset link log --- src/engine_robust.cpp | 162 ++++++++++++++++++---------------------- test/test_allreduce.cpp | 4 +- 2 files changed, 73 insertions(+), 93 deletions(-) diff --git a/src/engine_robust.cpp b/src/engine_robust.cpp index 9ed2a31fb..3382d189d 100644 --- a/src/engine_robust.cpp +++ b/src/engine_robust.cpp @@ -1,6 +1,6 @@ /*! * \file engine_robust.cpp - * \brief Robust implementation of AllReduce + * \brief Robust implementation of AllReduce * using TCP non-block socket and tree-shape reduction. * * This implementation considers the failure of nodes @@ -35,8 +35,10 @@ class AllReduceManager : public IEngine { // constant one byte out of band message to indicate error happening // and mark for channel cleanup const static char kOOBReset = 95; + // and mark for channel cleanup, after OOB signal + const static char kResetMark = 97; // and mark for channel cleanup - const static char kOOBResetAck = 97; + const static char kResetAck = 97; AllReduceManager(void) { master_uri = "NULL"; @@ -173,7 +175,6 @@ class AllReduceManager : public IEngine { size_t count, ReduceFunction reducer) { while (true) { - if (rank == rand() % 3) TryResetLinks(); ReturnType ret = TryAllReduce(sendrecvbuf_, type_nbytes, count, reducer); if (ret == kSuccess) return; if (ret == kSockError) { @@ -280,119 +281,95 @@ class AllReduceManager : public IEngine { for (int i = 0; i < nlink; ++i) { links[i].InitBuffer(sizeof(int), 1 << 10, reduce_buffer_size); links[i].ResetSize(); + links[i].except = false; } - printf("[%d] start to reset link\n", rank); + // read and discard data from all channels until pass mark while (true) { - printf("[%d] loop\n", rank); - bool finished = true; for (int i = 0; i < nlink; ++i) { if (links[i].sock.BadSocket()) continue; if (links[i].size_write == 0) { char sig = kOOBReset; ssize_t len = links[i].sock.Send(&sig, sizeof(sig), MSG_OOB); // error will be filtered in next loop - if (len != -1) { - links[i].size_write += len; - printf("[%d] send OOB success\n", rank); + if (len == sizeof(sig)) links[i].size_write = 1; + } + if (links[i].size_write == 1) { + char sig = kResetMark; + ssize_t len = links[i].sock.Send(&sig, sizeof(sig)); + if (len == sizeof(sig)) links[i].size_write = 2; + } + if (links[i].size_read == 0) { + int atmark = links[i].sock.AtMark(); + if (atmark < 0) { + utils::Assert(links[i].sock.BadSocket(), "must already gone bad"); + } else if (atmark > 0) { + links[i].size_read = 1; + } else { + // no at mark, read and discard data + ssize_t len = links[i].sock.Recv(links[i].buffer_head, links[i].buffer_size); + // zero length, remote closed the connection, close socket + if (len == 0) links[i].sock.Close(); } } - // need to send OOB to every other link - if (links[i].size_write == 0) finished = false; } - if (finished) break; - } - printf("[%d] finish send all OOB\n", rank); - // wait for incoming except from all links - for (int i = 0; i < nlink; ++ i) { - if (links[i].sock.BadSocket()) continue; - printf("[%d] wait except\n", rank); - if (utils::SelectHelper::WaitExcept(links[i].sock) == -1) { - utils::Socket::Error("select"); - } - printf("[%d] finish wait except\n", rank); - } - printf("[%d] start to discard link\n", rank); - // read and discard data from all channels until pass mark - while (true) { utils::SelectHelper rsel; bool finished = true; for (int i = 0; i < nlink; ++i) { - if (links[i].sock.BadSocket()) continue; - if (links[i].size_read == 0) { - int atmark = links[i].sock.AtMark(); - if (atmark < 0) return kSockError; - if (atmark == 1) { - char oob_msg; - ssize_t len = links[i].sock.Recv(&oob_msg, sizeof(oob_msg), MSG_OOB); - if (len == -1 && errno != EAGAIN && errno != EWOULDBLOCK) { - finished = false; continue; - } - utils::Assert(oob_msg == kOOBReset, "wrong oob msg"); - links[i].size_read = 1; - } else { - finished = false; - rsel.WatchRead(links[i].sock); - } + if (links[i].size_write != 2 && !links[i].sock.BadSocket()) { + rsel.WatchWrite(links[i].sock); finished = false; + } + if (links[i].size_read == 0 && !links[i].sock.BadSocket()) { + rsel.WatchRead(links[i].sock); finished = false; } } if (finished) break; // wait to read from the channels to discard data rsel.Select(); - printf("[%d] select finish read from\n", rank); - for (int i = 0; i < nlink; ++i) { - if (links[i].sock.BadSocket()) continue; - if (rsel.CheckRead(links[i].sock)) { - ssize_t len = links[i].sock.Recv(links[i].buffer_head, links[i].buffer_size); - // zero length, remote closed the connection, close socket - if (len == 0) { - links[i].sock.Close(); - } else if (len == -1) { - // when error happens here, oob_clear will remember - if (errno == EAGAIN && errno == EWOULDBLOCK) printf("would block\n"); - } else { - printf("[%d] discard %ld bytes\n", rank, len); - } - } - } } - printf("[%d] discard all success\n", rank); - // start synchronization step + // start synchronization, use blocking I/O to avoid select for (int i = 0; i < nlink; ++i) { - links[i].ResetSize(); - } - while (true) { - // selecter for TryResetLinks - utils::SelectHelper rsel; - for (int i = 0; i < nlink; ++i) { - if (links[i].sock.BadSocket()) continue; - if (links[i].size_read == 0) rsel.WatchRead(links[i].sock); - if (links[i].size_write == 0) rsel.WatchWrite(links[i].sock); - } - printf("[%d] before select\n", rank); - rsel.Select(); - printf("[%d] after select\n", rank); - bool finished = true; - for (int i = 0; i < nlink; ++i) { - if (links[i].sock.BadSocket()) continue; - if (links[i].size_read == 0 && rsel.CheckRead(links[i].sock)) { - char ack; - links[i].ReadToArray(&ack, sizeof(ack)); - if (links[i].size_read != 0) { - utils::Assert(ack == kOOBResetAck, "expect ack message"); + if (!links[i].sock.BadSocket()) { + char oob_mark; + links[i].sock.SetNonBlock(false); + ssize_t len = links[i].sock.Recv(&oob_mark, sizeof(oob_mark), MSG_WAITALL); + if (len == 0) { + links[i].sock.Close(); continue; + } else if (len > 0) { + utils::Assert(oob_mark == kResetMark, "wrong oob msg"); + utils::Assert(!links[i].sock.AtMark(), "should already read past mark"); + } else { + utils::Assert(errno != EAGAIN|| errno != EWOULDBLOCK, "BUG"); + } + // send out ack + char ack = kResetAck; + while (true) { + len = links[i].sock.Send(&ack, sizeof(ack)); + if (len == sizeof(ack)) break; + if (len == -1) { + if (errno != EAGAIN && errno != EWOULDBLOCK) break; } } - if (links[i].size_write == 0 && rsel.CheckWrite(links[i].sock)) { - char ack = kOOBResetAck; - links[i].WriteFromArray(&ack, sizeof(ack)); - } - if (links[i].size_read == 0 || links[i].size_write == 0) finished = false; } - if (finished) break; } - printf("[%d] after the read write data success\n", rank); + // wait all ack + for (int i = 0; i < nlink; ++i) { + if (!links[i].sock.BadSocket()) { + char ack; + ssize_t len = links[i].sock.Recv(&ack, sizeof(ack), MSG_WAITALL); + if (len == 0) { + links[i].sock.Close(); continue; + } else if (len > 0) { + utils::Assert(ack == kResetAck, "wrong Ack MSG"); + } else { + utils::Assert(errno != EAGAIN|| errno != EWOULDBLOCK, "BUG"); + } + // set back to nonblock mode + links[i].sock.SetNonBlock(true); + } + } for (int i = 0; i < nlink; ++i) { if (links[i].sock.BadSocket()) return kSockError; - } + } return kSuccess; } // Run AllReduce, return if success @@ -540,9 +517,12 @@ class AllReduceManager : public IEngine { // pointer to buffer head char *buffer_head; // buffer size, in bytes - size_t buffer_size; + size_t buffer_size; + // exception + bool except; // constructor LinkRecord(void) {} + // initialize buffer inline void InitBuffer(size_t type_nbytes, size_t count, size_t reduce_buffer_size) { size_t n = (type_nbytes * count + 7)/ 8; @@ -587,7 +567,7 @@ class AllReduceManager : public IEngine { * \return true if it is an successful read, false if there is some error happens, check errno */ inline bool ReadToArray(void *recvbuf_, size_t max_size) { - if (max_size == size_read ) return true; + if (max_size == size_read) return true; char *p = static_cast(recvbuf_); ssize_t len = sock.Recv(p + size_read, max_size - size_read); // length equals 0, remote disconnected diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 9afdc6d03..40c85ea0b 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -76,9 +76,9 @@ int main(int argc, char *argv[]) { printf("[%d] start at %s\n", rank, name.c_str()); TestMax(mock, n); - printf("[%d] TestMax pass\n", rank); + printf("[%d] !!!TestMax pass\n", rank); TestSum(mock, n); - printf("[%d] TestSum pass\n", rank); + printf("[%d] !!!TestSum pass\n", rank); sync::Finalize(); printf("[%d] all check pass\n", rank); return 0; From 5b0bb531847d44f9dd97b1c2fdfdac87530843b5 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 29 Nov 2014 20:15:27 -0800 Subject: [PATCH 013/531] refactor code style, reset link still need thoughts --- src/engine.cc | 39 +++ src/engine.h | 6 +- src/engine_base.cc | 349 ++++++++++++++++++++ src/engine_base.h | 244 ++++++++++++++ src/engine_robust.cc | 181 +++++++++++ src/engine_robust.cpp | 702 ---------------------------------------- src/engine_robust.h | 206 ++++++++++++ src/engine_tcp.cpp | 485 --------------------------- src/utils.h | 12 + test/Makefile | 14 +- test/test_allreduce.cpp | 6 +- 11 files changed, 1045 insertions(+), 1199 deletions(-) create mode 100644 src/engine.cc create mode 100644 src/engine_base.cc create mode 100644 src/engine_base.h create mode 100644 src/engine_robust.cc delete mode 100644 src/engine_robust.cpp create mode 100644 src/engine_robust.h delete mode 100644 src/engine_tcp.cpp diff --git a/src/engine.cc b/src/engine.cc new file mode 100644 index 000000000..17aacd5cf --- /dev/null +++ b/src/engine.cc @@ -0,0 +1,39 @@ +/*! + * \file engine.cc + * \brief this file governs which implementation of engine we are actually using + * provides an singleton of engine interface + * + * \author Tianqi, Nacho, Tianyi + */ +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE +#define NOMINMAX + +#include "./engine.h" +#include "./engine_base.h" +#include "./engine_robust.h" + +namespace engine { +// singleton sync manager +AllReduceRobust manager; + +/*! \brief intiialize the synchronization module */ +void Init(int argc, char *argv[]) { + for (int i = 1; i < argc; ++i) { + char name[256], val[256]; + if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { + manager.SetParam(name, val); + } + } + manager.Init(); +} + +/*! \brief finalize syncrhonization module */ +void Finalize(void) { + manager.Shutdown(); +} +/*! \brief singleton method to get engine */ +IEngine *GetEngine(void) { + return &manager; +} +} // namespace engine diff --git a/src/engine.h b/src/engine.h index 510f3aabd..42e19c139 100644 --- a/src/engine.h +++ b/src/engine.h @@ -1,10 +1,10 @@ -#ifndef ALLREDUCE_ENGINE_H -#define ALLREDUCE_ENGINE_H /*! * \file engine.h * \brief This file defines the interface of allreduce library * \author Tianqi Chen, Nacho, Tianyi */ +#ifndef ALLREDUCE_ENGINE_H +#define ALLREDUCE_ENGINE_H #include "./io.h" @@ -49,7 +49,7 @@ class IEngine { * \param sendrecvbuf_ buffer for both sending and recving data * \param size the size of the data to be broadcasted * \param root the root worker id to broadcast the data - */ + */ virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) = 0; /*! * \brief load latest check point diff --git a/src/engine_base.cc b/src/engine_base.cc new file mode 100644 index 000000000..e2eca014f --- /dev/null +++ b/src/engine_base.cc @@ -0,0 +1,349 @@ +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE +#define NOMINMAX +#include +#include "./engine_base.h" + +namespace engine { +// constructor +AllReduceBase::AllReduceBase(void) { + master_uri = "NULL"; + master_port = 9000; + host_uri = ""; + slave_port = 9010; + nport_trial = 1000; + rank = 0; + world_size = 1; + this->SetParam("reduce_buffer", "256MB"); +} + +// initialization function +void AllReduceBase::Init(void) { + utils::Socket::Startup(); + // single node mode + if (master_uri == "NULL") return; + utils::Assert(links.size() == 0, "can only call Init once"); + int magic = kMagic; + int nchild = 0, nparent = 0; + this->host_uri = utils::SockAddr::GetHostName(); + // get information from master + utils::TCPSocket master; + master.Create(); + if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) { + utils::Socket::Error("Connect"); + } + utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 1"); + utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 2"); + utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); + utils::Assert(master.RecvAll(&rank, sizeof(rank)) == sizeof(rank), "sync::Init failure 3"); + utils::Assert(master.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), "sync::Init failure 4"); + utils::Assert(master.RecvAll(&nparent, sizeof(nparent)) == sizeof(nparent), "sync::Init failure 5"); + utils::Assert(master.RecvAll(&nchild, sizeof(nchild)) == sizeof(nchild), "sync::Init failure 6"); + utils::Assert(nchild >= 0, "in correct number of childs"); + utils::Assert(nparent == 1 || nparent == 0, "in correct number of parent"); + + // create listen + utils::TCPSocket sock_listen; + sock_listen.Create(); + int port = sock_listen.TryBindHost(slave_port, slave_port + nport_trial); + utils::Check(port != -1, "sync::Init fail to bind the ports specified"); + sock_listen.Listen(); + + if (nparent != 0) { + parent_index = 0; + links.push_back(LinkRecord()); + int len, hport; + std::string hname; + utils::Assert(master.RecvAll(&len, sizeof(len)) == sizeof(len), "sync::Init failure 9"); + hname.resize(len); + utils::Assert(len != 0, "string must not be empty"); + utils::Assert(master.RecvAll(&hname[0], len) == static_cast(len), "sync::Init failure 10"); + utils::Assert(master.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "sync::Init failure 11"); + links[0].sock.Create(); + links[0].sock.Connect(utils::SockAddr(hname.c_str(), hport)); + utils::Assert(links[0].sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 12"); + utils::Assert(links[0].sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 13"); + utils::Check(magic == kMagic, "sync::Init failure, parent magic number mismatch"); + parent_index = 0; + } else { + parent_index = -1; + } + // send back socket listening port to master + utils::Assert(master.SendAll(&port, sizeof(port)) == sizeof(port), "sync::Init failure 14"); + // close connection to master + master.Close(); + // accept links from childs + for (int i = 0; i < nchild; ++i) { + LinkRecord r; + while (true) { + r.sock = sock_listen.Accept(); + if (r.sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic) && magic == kMagic) { + utils::Assert(r.sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 15"); + break; + } else { + // not a valid child + r.sock.Close(); + } + } + links.push_back(r); + } + // close listening sockets + sock_listen.Close(); + // setup selecter + for (size_t i = 0; i < links.size(); ++i) { + // set the socket to non-blocking mode + links[i].sock.SetNonBlock(true); + } + // done +} + +void AllReduceBase::Shutdown(void) { + for (size_t i = 0; i < links.size(); ++i) { + links[i].sock.Close(); + } + links.clear(); + utils::TCPSocket::Finalize(); +} +// set the parameters for AllReduce +void AllReduceBase::SetParam(const char *name, const char *val) { + if (!strcmp(name, "master_uri")) master_uri = val; + if (!strcmp(name, "master_port")) master_port = atoi(val); + if (!strcmp(name, "reduce_buffer")) { + char unit; + unsigned long amount; + if (sscanf(val, "%lu%c", &amount, &unit) == 2) { + switch (unit) { + case 'B': reduce_buffer_size = (amount + 7)/ 8; break; + case 'K': reduce_buffer_size = amount << 7UL; break; + case 'M': reduce_buffer_size = amount << 17UL; break; + case 'G': reduce_buffer_size = amount << 27UL; break; + default: utils::Error("invalid format for reduce buffer"); + } + } else { + utils::Error("invalid format for reduce_buffer, shhould be {integer}{unit}, unit can be {B, KB, MB, GB}"); + } + } +} + +/*! + * \brief perform in-place allreduce, on sendrecvbuf, this function can fail, and will return the cause of failure + * + * NOTE on AllReduce: + * The kSuccess TryAllReduce does NOT mean every node have successfully finishes TryAllReduce. + * It only means the current node get the correct result of AllReduce. + * However, it means every node finishes LAST call(instead of this one) of AllReduce/Bcast + * + * \param sendrecvbuf_ buffer for both sending and recving data + * \param type_nbytes the unit number of bytes the type have + * \param count number of elements to be reduced + * \param reducer reduce function + * \return this function can return + * - kSuccess: allreduce is success, + * - kSockError: a neighbor node go down, the connection is dropped + * - kGetExcept: another node which is not my neighbor go down, get Out-of-Band exception notification from my neighbor + */ +AllReduceBase::ReturnType +AllReduceBase::TryAllReduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer) { + if (links.size() == 0) return kSuccess; + // total size of message + const size_t total_size = type_nbytes * count; + // number of links + const int nlink = static_cast(links.size()); + // send recv buffer + char *sendrecvbuf = reinterpret_cast(sendrecvbuf_); + // size of space that we already performs reduce in up pass + size_t size_up_reduce = 0; + // size of space that we have already passed to parent + size_t size_up_out = 0; + // size of message we received, and send in the down pass + size_t size_down_in = 0; + // initialize the link ring-buffer and pointer + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + links[i].InitBuffer(type_nbytes, count, reduce_buffer_size); + } + links[i].ResetSize(); + } + // if no childs, no need to reduce + if (nlink == static_cast(parent_index != -1)) { + size_up_reduce = total_size; + } + + // while we have not passed the messages out + while (true) { + // select helper + utils::SelectHelper selecter; + for (size_t i = 0; i < links.size(); ++i) { + selecter.WatchRead(links[i].sock); + selecter.WatchWrite(links[i].sock); + selecter.WatchException(links[i].sock); + } + // select must return + selecter.Select(); + // exception handling + for (int i = 0; i < nlink; ++i) { + // recive OOB message from some link + if (selecter.CheckExcept(links[i].sock)) return kGetExcept; + } + // read data from childs + for (int i = 0; i < nlink; ++i) { + if (i != parent_index && selecter.CheckRead(links[i].sock)) { + if (!links[i].ReadToRingBuffer(size_up_out)) return kSockError; + } + } + // this node have childs, peform reduce + if (nlink > static_cast(parent_index != -1)) { + size_t buffer_size = 0; + // do upstream reduce + size_t max_reduce = total_size; + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + max_reduce= std::min(max_reduce, links[i].size_read); + utils::Assert(buffer_size == 0 || buffer_size == links[i].buffer_size, + "buffer size inconsistent"); + buffer_size = links[i].buffer_size; + } + } + utils::Assert(buffer_size != 0, "must assign buffer_size"); + // round to type_n4bytes + max_reduce = (max_reduce / type_nbytes * type_nbytes); + // peform reduce, can be at most two rounds + while (size_up_reduce < max_reduce) { + // start position + size_t start = size_up_reduce % buffer_size; + // peform read till end of buffer + size_t nread = std::min(buffer_size - start, max_reduce - size_up_reduce); + utils::Assert(nread % type_nbytes == 0, "AllReduce: size check"); + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + reducer(links[i].buffer_head + start, + sendrecvbuf + size_up_reduce, + static_cast(nread / type_nbytes), + MPI::Datatype(type_nbytes)); + } + } + size_up_reduce += nread; + } + } + if (parent_index != -1) { + // pass message up to parent, can pass data that are already been reduced + if (selecter.CheckWrite(links[parent_index].sock)) { + ssize_t len = links[parent_index].sock. + Send(sendrecvbuf + size_up_out, size_up_reduce - size_up_out); + if (len != -1) { + size_up_out += static_cast(len); + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + } + } + // read data from parent + if (selecter.CheckRead(links[parent_index].sock) && total_size > size_down_in) { + ssize_t len = links[parent_index].sock. + Recv(sendrecvbuf + size_down_in, total_size - size_down_in); + if (len == 0) { + links[parent_index].sock.Close(); return kSockError; + } + if (len != -1) { + size_down_in += static_cast(len); + utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error"); + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + } + } + } else { + // this is root, can use reduce as most recent point + size_down_in = size_up_out = size_up_reduce; + } + // check if we finished the job of message passing + size_t nfinished = size_down_in; + // can pass message down to childs + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + if (selecter.CheckWrite(links[i].sock)) { + if (!links[i].WriteFromArray(sendrecvbuf, size_down_in)) return kSockError; + } + nfinished = std::min(links[i].size_write, nfinished); + } + } + // check boundary condition + if (nfinished >= total_size) break; + } + return kSuccess; +} +/*! + * \brief broadcast data from root to all nodes, this function can fail,and will return the cause of failure + * \param sendrecvbuf_ buffer for both sending and recving data + * \param total_size the size of the data to be broadcasted + * \param root the root worker id to broadcast the data + * \return this function can return three possible values, see detail in TryAllReduce + */ +AllReduceBase::ReturnType +AllReduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { + if (links.size() == 0) return kSuccess; + // number of links + const int nlink = static_cast(links.size()); + // size of space already read from data + size_t size_in = 0; + // input link, -2 means unknown yet, -1 means this is root + int in_link = -2; + + // initialize the link statistics + for (int i = 0; i < nlink; ++i) { + links[i].ResetSize(); + } + // root have all the data + if (this->rank == root) { + size_in = total_size; + in_link = -1; + } + // while we have not passed the messages out + while(true) { + // select helper + utils::SelectHelper selecter; + for (size_t i = 0; i < links.size(); ++i) { + selecter.WatchRead(links[i].sock); + selecter.WatchWrite(links[i].sock); + selecter.WatchException(links[i].sock); + } + // exception handling + for (int i = 0; i < nlink; ++i) { + // recive OOB message from some link + if (selecter.CheckExcept(links[i].sock)) return kGetExcept; + } + if (in_link == -2) { + // probe in-link + for (int i = 0; i < nlink; ++i) { + if (selecter.CheckRead(links[i].sock)) { + if (!links[i].ReadToArray(sendrecvbuf_, total_size)) return kSockError; + size_in = links[i].size_read; + if (size_in != 0) { + in_link = i; break; + } + } + } + } else { + // read from in link + if (in_link >= 0 && selecter.CheckRead(links[in_link].sock)) { + if(!links[in_link].ReadToArray(sendrecvbuf_, total_size)) return kSockError; + size_in = links[in_link].size_read; + } + } + size_t nfinished = total_size; + // send data to all out-link + for (int i = 0; i < nlink; ++i) { + if (i != in_link) { + if (selecter.CheckWrite(links[i].sock)) { + if (!links[i].WriteFromArray(sendrecvbuf_, size_in)) return kSockError; + } + nfinished = std::min(nfinished, links[i].size_write); + } + } + // check boundary condition + if (nfinished >= total_size) break; + } + return kSuccess; +} +} // namespace engine diff --git a/src/engine_base.h b/src/engine_base.h new file mode 100644 index 000000000..6c138529a --- /dev/null +++ b/src/engine_base.h @@ -0,0 +1,244 @@ +/*! + * \file engine_base.h + * \brief Basic implementation of AllReduce + * using TCP non-block socket and tree-shape reduction. + * + * This implementation provides basic utility of AllReduce and Broadcast + * without considering node failure + * + * \author Tianqi, Nacho, Tianyi + */ +#ifndef ALLREDUCE_ENGINE_BASE_H +#define ALLREDUCE_ENGINE_BASE_H + +#include +#include +#include "./utils.h" +#include "./socket.h" +#include "./engine.h" + +namespace MPI { +// MPI data type to be compatible with existing MPI interface +class Datatype { + public: + size_t type_size; + Datatype(size_t type_size) : type_size(type_size) {} +}; +} + +namespace engine { +/*! \brief implementation of basic AllReduce engine */ +class AllReduceBase : public IEngine { + public: + // magic number to verify server + const static int kMagic = 0xff99; + // constant one byte out of band message to indicate error happening + AllReduceBase(void); + virtual ~AllReduceBase(void) {} + // shutdown the engine + void Shutdown(void); + // initialize the manager + void Init(void); + /*! \brief set parameters to the sync manager */ + virtual void SetParam(const char *name, const char *val); + /*! \brief get rank */ + virtual int GetRank(void) const { + return rank; + } + /*! \brief get rank */ + virtual int GetWorldSize(void) const { + return world_size; + } + /*! \brief get rank */ + virtual std::string GetHost(void) const { + return host_uri; + } + /*! + * \brief perform in-place allreduce, on sendrecvbuf + * this function is NOT thread-safe + * \param sendrecvbuf_ buffer for both sending and recving data + * \param type_nbytes the unit number of bytes the type have + * \param count number of elements to be reduced + * \param reducer reduce function + */ + virtual void AllReduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer) { + utils::Assert(TryAllReduce(sendrecvbuf_, type_nbytes, count, reducer) == kSuccess, + "AllReduce failed"); + } + /*! + * \brief broadcast data from root to all nodes + * \param sendrecvbuf_ buffer for both sending and recving data + * \param size the size of the data to be broadcasted + * \param root the root worker id to broadcast the data + */ + virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { + utils::Assert(TryBroadcast(sendrecvbuf_, total_size, root) == kSuccess, + "AllReduce failed"); + } + /*! + * \brief load latest check point + * \param p_model pointer to the model + * \return true if there was stored checkpoint and load was successful + * false if there was no stored checkpoint, means we are start over gain + */ + virtual bool LoadCheckPoint(utils::ISerializable *p_model) { + return false; + } + /*! + * \brief checkpoint the model, meaning we finished a stage of execution + * \param p_model pointer to the model + */ + virtual void CheckPoint(const utils::ISerializable &model) { + } + + protected: + /*! \brief enumeration of possible returning results from Try functions */ + enum ReturnType { + kSuccess, + kSockError, + kGetExcept + }; + // link record to a neighbor + struct LinkRecord { + public: + // socket to get data from/to link + utils::TCPSocket sock; + // size of data readed from link + size_t size_read; + // size of data sent to the link + size_t size_write; + // pointer to buffer head + char *buffer_head; + // buffer size, in bytes + size_t buffer_size; + // constructor + LinkRecord(void) {} + // initialize buffer + inline void InitBuffer(size_t type_nbytes, size_t count, size_t reduce_buffer_size) { + size_t n = (type_nbytes * count + 7)/ 8; + buffer_.resize(std::min(reduce_buffer_size, n)); + // make sure align to type_nbytes + buffer_size = buffer_.size() * sizeof(uint64_t) / type_nbytes * type_nbytes; + utils::Assert(type_nbytes <= buffer_size, "too large type_nbytes=%lu, buffer_size=%lu", type_nbytes, buffer_size); + // set buffer head + buffer_head = reinterpret_cast(BeginPtr(buffer_)); + } + // reset the recv and sent size + inline void ResetSize(void) { + size_write = size_read = 0; + } + /*! + * \brief read data into ring-buffer, with care not to existing useful override data + * position after protect_start + * \param protect_start all data start from protect_start is still needed in buffer + * read shall not override this + * \return true if it is an successful read, false if there is some error happens, check errno + */ + inline bool ReadToRingBuffer(size_t protect_start) { + size_t ngap = size_read - protect_start; + utils::Assert(ngap <= buffer_size, "AllReduce: boundary check"); + size_t offset = size_read % buffer_size; + size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); + if (nmax == 0) return true; + ssize_t len = sock.Recv(buffer_head + offset, nmax); + // length equals 0, remote disconnected + if (len == 0) { + sock.Close(); return false; + } + if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; + size_read += static_cast(len); + return true; + } + /*! + * \brief read data into array, + * this function can not be used together with ReadToRingBuffer + * a link can either read into the ring buffer, or existing array + * \param max_size maximum size of array + * \return true if it is an successful read, false if there is some error happens, check errno + */ + inline bool ReadToArray(void *recvbuf_, size_t max_size) { + if (max_size == size_read) return true; + char *p = static_cast(recvbuf_); + ssize_t len = sock.Recv(p + size_read, max_size - size_read); + // length equals 0, remote disconnected + if (len == 0) { + sock.Close(); return false; + } + if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; + size_read += static_cast(len); + return true; + } + /*! + * \brief write data in array to sock + * \param sendbuf_ head of array + * \param max_size maximum size of array + * \return true if it is an successful write, false if there is some error happens, check errno + */ + inline bool WriteFromArray(const void *sendbuf_, size_t max_size) { + const char *p = static_cast(sendbuf_); + ssize_t len = sock.Send(p + size_write, max_size - size_write); + if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; + size_write += static_cast(len); + return true; + } + + private: + // recv buffer to get data from child + // aligned with 64 bits, will be able to perform 64 bits operations freely + std::vector buffer_; + }; + /*! + * \brief perform in-place allreduce, on sendrecvbuf, this function can fail, and will return the cause of failure + * + * NOTE on AllReduce: + * The kSuccess TryAllReduce does NOT mean every node have successfully finishes TryAllReduce. + * It only means the current node get the correct result of AllReduce. + * However, it means every node finishes LAST call(instead of this one) of AllReduce/Bcast + * + * \param sendrecvbuf_ buffer for both sending and recving data + * \param type_nbytes the unit number of bytes the type have + * \param count number of elements to be reduced + * \param reducer reduce function + * \return this function can return + * - kSuccess: allreduce is success, + * - kSockError: a neighbor node go down, the connection is dropped + * - kGetExcept: another node which is not my neighbor go down, get Out-of-Band exception notification from my neighbor + */ + ReturnType TryAllReduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer); + /*! + * \brief broadcast data from root to all nodes, this function can fail,and will return the cause of failure + * \param sendrecvbuf_ buffer for both sending and recving data + * \param size the size of the data to be broadcasted + * \param root the root worker id to broadcast the data + * \return this function can return three possible values, see detail in TryAllReduce + */ + ReturnType TryBroadcast(void *sendrecvbuf_, size_t size, int root); + //---- local data related to link ---- + // index of parent link, can be -1, meaning this is root of the tree + int parent_index; + // sockets of all links + std::vector links; + //----- meta information----- + // uri of current host, to be set by Init + std::string host_uri; + // uri of master + std::string master_uri; + // port of master address + int master_port; + // port of slave process + int slave_port, nport_trial; + // reduce buffer size + size_t reduce_buffer_size; + // current rank + int rank; + // world size + int world_size; +}; +} // namespace engine +#endif // ALLREDUCE_ENGINE_BASE_H diff --git a/src/engine_robust.cc b/src/engine_robust.cc new file mode 100644 index 000000000..b969bc9f6 --- /dev/null +++ b/src/engine_robust.cc @@ -0,0 +1,181 @@ +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE +#define NOMINMAX +#include "./utils.h" +#include "./engine_robust.h" + +namespace engine { +/*! + * \brief perform in-place allreduce, on sendrecvbuf + * this function is NOT thread-safe + * \param sendrecvbuf_ buffer for both sending and recving data + * \param type_nbytes the unit number of bytes the type have + * \param count number of elements to be reduced + * \param reducer reduce function + */ +void AllReduceRobust::AllReduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer) { + utils::LogPrintf("[%d] call AllReduce", rank); + TryResetLinks(); + utils::LogPrintf("[%d] start work", rank); + while (true) { + ReturnType ret = TryAllReduce(sendrecvbuf_, type_nbytes, count, reducer); + if (ret == kSuccess) return; + if (ret == kSockError) { + utils::Error("error occur during all reduce\n"); + } + utils::LogPrintf("[%d] receive except signal, start reset link", rank); + TryResetLinks(); + //utils::Check(TryResetLinks() == kSuccess, "error when reset links"); + } + // TODO +} +/*! + * \brief broadcast data from root to all nodes + * \param sendrecvbuf_ buffer for both sending and recving data + * \param size the size of the data to be broadcasted + * \param root the root worker id to broadcast the data + */ +void AllReduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) { + utils::Assert(TryBroadcast(sendrecvbuf_, total_size, root) == kSuccess, + "AllReduce failed"); + // TODO +} +/*! + * \brief load latest check point + * \param p_model pointer to the model + * \return true if there was stored checkpoint and load was successful + * false if there was no stored checkpoint, means we are start over gain + */ +bool AllReduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { + // TODO + return false; +} +/*! + * \brief checkpoint the model, meaning we finished a stage of execution + * \param p_model pointer to the model + */ +void AllReduceRobust::CheckPoint(const utils::ISerializable &model) { + // TODO +} +/*! + * \brief reset the all the existing links by sending Out-of-Band message marker + * after this function finishes, all the messages received and sent before in all live links are discarded, + * This allows us to get a fresh start after error has happened + * + * \return this function can return kSuccess or kSockError + * when kSockError is returned, it simply means there are bad sockets in the links, + * and some link recovery proceduer is needed + */ +AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { + utils::LogPrintf("[%d] TryResetLinks, start\n", rank); + // number of links + const int nlink = static_cast(links.size()); + for (int i = 0; i < nlink; ++i) { + links[i].InitBuffer(sizeof(int), 1 << 10, reduce_buffer_size); + links[i].ResetSize(); + } + // read and discard data from all channels until pass mark + while (true) { + for (int i = 0; i < nlink; ++i) { + if (links[i].sock.BadSocket()) continue; + if (links[i].size_write == 0) { + char sig = kOOBReset; + ssize_t len = links[i].sock.Send(&sig, sizeof(sig), MSG_OOB); + // error will be filtered in next loop + if (len == sizeof(sig)) links[i].size_write = 1; + } + if (links[i].size_write == 1) { + char sig = kResetMark; + ssize_t len = links[i].sock.Send(&sig, sizeof(sig)); + if (len == sizeof(sig)) links[i].size_write = 2; + } + if (links[i].size_read == 0) { + int atmark = links[i].sock.AtMark(); + if (atmark < 0) { + utils::Assert(links[i].sock.BadSocket(), "must already gone bad"); + } else if (atmark > 0) { + links[i].size_read = 1; + } else { + printf("buffer_size=%lu\n", links[i].buffer_size); + // no at mark, read and discard data + ssize_t len = links[i].sock.Recv(links[i].buffer_head, links[i].buffer_size); + // zero length, remote closed the connection, close socket + if (len == 0) links[i].sock.Close(); + } + } + } + utils::SelectHelper rsel; + bool finished = true; + for (int i = 0; i < nlink; ++i) { + if (links[i].size_write != 2 && !links[i].sock.BadSocket()) { + rsel.WatchWrite(links[i].sock); finished = false; + } + if (links[i].size_read == 0 && !links[i].sock.BadSocket()) { + rsel.WatchRead(links[i].sock); finished = false; + } + } + if (finished) break; + // wait to read from the channels to discard data + rsel.Select(); + } + utils::LogPrintf("[%d] Finish discard data\n", rank); + // start synchronization, use blocking I/O to avoid select + for (int i = 0; i < nlink; ++i) { + if (!links[i].sock.BadSocket()) { + char oob_mark; + links[i].sock.SetNonBlock(false); + ssize_t len = links[i].sock.Recv(&oob_mark, sizeof(oob_mark), MSG_WAITALL); + if (len == 0) { + links[i].sock.Close(); continue; + } else if (len > 0) { + utils::Assert(oob_mark == kResetMark, "wrong oob msg"); + utils::Assert(!links[i].sock.AtMark(), "should already read past mark"); + } else { + utils::Assert(errno != EAGAIN|| errno != EWOULDBLOCK, "BUG"); + } + // send out ack + char ack = kResetAck; + while (true) { + len = links[i].sock.Send(&ack, sizeof(ack)); + if (len == sizeof(ack)) break; + if (len == -1) { + if (errno != EAGAIN && errno != EWOULDBLOCK) break; + } + } + } + } + utils::LogPrintf("[%d] GGet all Acks\n", rank); + // wait all ack + for (int i = 0; i < nlink; ++i) { + if (!links[i].sock.BadSocket()) { + char ack; + ssize_t len = links[i].sock.Recv(&ack, sizeof(ack), MSG_WAITALL); + if (len == 0) { + links[i].sock.Close(); continue; + } else if (len > 0) { + utils::Assert(ack == kResetAck, "wrong Ack MSG"); + } else { + utils::Assert(errno != EAGAIN|| errno != EWOULDBLOCK, "BUG"); + } + // set back to nonblock mode + links[i].sock.SetNonBlock(true); + } + } + for (int i = 0; i < nlink; ++i) { + if (links[i].sock.BadSocket()) return kSockError; + } + utils::LogPrintf("[%d] TryResetLinks,!! return\n", rank); + return kSuccess; +} + +bool AllReduceRobust::RecoverExec(void *sendrecvbuf_, size_t size, int flag, int seqno) { + if (flag != 0) { + utils::Assert(seqno == ActionSummary::kMaxSeq, "must only set seqno for normal operations"); + } + ActionSummary act(flag, seqno); + return true; +} +} // namespace engine diff --git a/src/engine_robust.cpp b/src/engine_robust.cpp deleted file mode 100644 index 3382d189d..000000000 --- a/src/engine_robust.cpp +++ /dev/null @@ -1,702 +0,0 @@ -/*! - * \file engine_robust.cpp - * \brief Robust implementation of AllReduce - * using TCP non-block socket and tree-shape reduction. - * - * This implementation considers the failure of nodes - * - * \author Tianqi, Nacho, Tianyi - */ -#define _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_DEPRECATE -#define NOMINMAX -#include -#include -#include -#include "./utils.h" -#include "./engine.h" -#include "./socket.h" - -namespace MPI { -// MPI data type to be compatible with existing MPI interface -class Datatype { - public: - size_t type_size; - Datatype(size_t type_size) : type_size(type_size) {} -}; -} - -namespace engine { -/*! \brief implementation of fault tolerant all reduce engine */ -class AllReduceManager : public IEngine { - public: - // magic number to verify server - const static int kMagic = 0xff99; - // constant one byte out of band message to indicate error happening - // and mark for channel cleanup - const static char kOOBReset = 95; - // and mark for channel cleanup, after OOB signal - const static char kResetMark = 97; - // and mark for channel cleanup - const static char kResetAck = 97; - - AllReduceManager(void) { - master_uri = "NULL"; - master_port = 9000; - host_uri = ""; - slave_port = 9010; - nport_trial = 1000; - rank = 0; - world_size = 1; - this->SetParam("reduce_buffer", "256MB"); - } - ~AllReduceManager(void) { - } - inline void Shutdown(void) { - for (size_t i = 0; i < links.size(); ++i) { - links[i].sock.Close(); - } - links.clear(); - utils::TCPSocket::Finalize(); - } - /*! \brief set parameters to the sync manager */ - inline void SetParam(const char *name, const char *val) { - if (!strcmp(name, "master_uri")) master_uri = val; - if (!strcmp(name, "master_port")) master_port = atoi(val); - if (!strcmp(name, "reduce_buffer")) { - char unit; - unsigned long amount; - if (sscanf(val, "%lu%c", &amount, &unit) == 2) { - switch (unit) { - case 'B': reduce_buffer_size = (amount + 7)/ 8; break; - case 'K': reduce_buffer_size = amount << 7UL; break; - case 'M': reduce_buffer_size = amount << 17UL; break; - case 'G': reduce_buffer_size = amount << 27UL; break; - default: utils::Error("invalid format for reduce buffer"); - } - } else { - utils::Error("invalid format for reduce_buffer, shhould be {integer}{unit}, unit can be {B, KB, MB, GB}"); - } - } - } - // initialize the manager - inline void Init(void) { - utils::Socket::Startup(); - // single node mode - if (master_uri == "NULL") return; - utils::Assert(links.size() == 0, "can only call Init once"); - int magic = kMagic; - int nchild = 0, nparent = 0; - this->host_uri = utils::SockAddr::GetHostName(); - // get information from master - utils::TCPSocket master; - master.Create(); - if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) { - utils::Socket::Error("Connect"); - } - utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 1"); - utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 2"); - utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); - utils::Assert(master.RecvAll(&rank, sizeof(rank)) == sizeof(rank), "sync::Init failure 3"); - utils::Assert(master.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), "sync::Init failure 4"); - utils::Assert(master.RecvAll(&nparent, sizeof(nparent)) == sizeof(nparent), "sync::Init failure 5"); - utils::Assert(master.RecvAll(&nchild, sizeof(nchild)) == sizeof(nchild), "sync::Init failure 6"); - utils::Assert(nchild >= 0, "in correct number of childs"); - utils::Assert(nparent == 1 || nparent == 0, "in correct number of parent"); - - // create listen - utils::TCPSocket sock_listen; - sock_listen.Create(); - int port = sock_listen.TryBindHost(slave_port, slave_port + nport_trial); - utils::Check(port != -1, "sync::Init fail to bind the ports specified"); - sock_listen.Listen(); - - if (nparent != 0) { - parent_index = 0; - links.push_back(LinkRecord()); - int len, hport; - std::string hname; - utils::Assert(master.RecvAll(&len, sizeof(len)) == sizeof(len), "sync::Init failure 9"); - hname.resize(len); - utils::Assert(len != 0, "string must not be empty"); - utils::Assert(master.RecvAll(&hname[0], len) == static_cast(len), "sync::Init failure 10"); - utils::Assert(master.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "sync::Init failure 11"); - links[0].sock.Create(); - links[0].sock.Connect(utils::SockAddr(hname.c_str(), hport)); - utils::Assert(links[0].sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 12"); - utils::Assert(links[0].sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 13"); - utils::Check(magic == kMagic, "sync::Init failure, parent magic number mismatch"); - parent_index = 0; - } else { - parent_index = -1; - } - // send back socket listening port to master - utils::Assert(master.SendAll(&port, sizeof(port)) == sizeof(port), "sync::Init failure 14"); - // close connection to master - master.Close(); - // accept links from childs - for (int i = 0; i < nchild; ++i) { - LinkRecord r; - while (true) { - r.sock = sock_listen.Accept(); - if (r.sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic) && magic == kMagic) { - utils::Assert(r.sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 15"); - break; - } else { - // not a valid child - r.sock.Close(); - } - } - links.push_back(r); - } - // close listening sockets - sock_listen.Close(); - // setup selecter - for (size_t i = 0; i < links.size(); ++i) { - // set the socket to non-blocking mode - links[i].sock.SetNonBlock(true); - } - // done - } - /*! \brief get rank */ - virtual int GetRank(void) const { - return rank; - } - /*! \brief get rank */ - virtual int GetWorldSize(void) const { - return world_size; - } - /*! \brief get rank */ - virtual std::string GetHost(void) const { - return host_uri; - } - virtual void AllReduce(void *sendrecvbuf_, - size_t type_nbytes, - size_t count, - ReduceFunction reducer) { - while (true) { - ReturnType ret = TryAllReduce(sendrecvbuf_, type_nbytes, count, reducer); - if (ret == kSuccess) return; - if (ret == kSockError) { - utils::Error("error occur during all reduce\n"); - } - utils::Check(TryResetLinks() == kSuccess, "error when reset links"); - } - } - /*! - * \brief broadcast data from root to all nodes - * \param sendrecvbuf_ buffer for both sending and recving data - * \param size the size of the data to be broadcasted - * \param root the root worker id to broadcast the data - */ - virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { - if (links.size() == 0) return; - // number of links - const int nlink = static_cast(links.size()); - // size of space already read from data - size_t size_in = 0; - // input link, -2 means unknown yet, -1 means this is root - int in_link = -2; - - // initialize the link statistics - for (int i = 0; i < nlink; ++i) { - links[i].ResetSize(); - } - // root have all the data - if (this->rank == root) { - size_in = total_size; - in_link = -1; - } - - // while we have not passed the messages out - while(true) { - // select helper - utils::SelectHelper selecter; - for (size_t i = 0; i < links.size(); ++i) { - selecter.WatchRead(links[i].sock); - selecter.WatchWrite(links[i].sock); - selecter.WatchException(links[i].sock); - } - if (in_link == -2) { - // probe in-link - for (int i = 0; i < nlink; ++i) { - if (selecter.CheckRead(links[i].sock)) { - if (!links[i].ReadToArray(sendrecvbuf_, total_size)) { - utils::Socket::Error("Recv"); - } - size_in = links[i].size_read; - if (size_in != 0) { - in_link = i; break; - } - } - } - } else { - // read from in link - if (in_link >= 0 && selecter.CheckRead(links[in_link].sock)) { - if(!links[in_link].ReadToArray(sendrecvbuf_, total_size)) { - utils::Socket::Error("Recv"); - } - size_in = links[in_link].size_read; - } - } - size_t nfinished = total_size; - // send data to all out-link - for (int i = 0; i < nlink; ++i) { - if (i != in_link) { - if (selecter.CheckWrite(links[i].sock)) { - if (!links[i].WriteFromArray(sendrecvbuf_, size_in)) { - utils::Socket::Error("Send"); - } - } - nfinished = std::min(nfinished, links[i].size_write); - } - } - // check boundary condition - if (nfinished >= total_size) break; - } - } - virtual bool LoadCheckPoint(utils::ISerializable *p_model) { - return false; - } - virtual void CheckPoint(const utils::ISerializable &model) { - } - - protected: - // possible returning type from the Try Functions - enum ReturnType { - kSuccess, - kSockError, - kGetExcept - }; - // possible state of the server - enum ServerState { - kNormal, - kConnDrop, - kRecover - }; - // cleanup the links, by sending OOB message - inline ReturnType TryResetLinks(void) { - // number of links - const int nlink = static_cast(links.size()); - for (int i = 0; i < nlink; ++i) { - links[i].InitBuffer(sizeof(int), 1 << 10, reduce_buffer_size); - links[i].ResetSize(); - links[i].except = false; - } - // read and discard data from all channels until pass mark - while (true) { - for (int i = 0; i < nlink; ++i) { - if (links[i].sock.BadSocket()) continue; - if (links[i].size_write == 0) { - char sig = kOOBReset; - ssize_t len = links[i].sock.Send(&sig, sizeof(sig), MSG_OOB); - // error will be filtered in next loop - if (len == sizeof(sig)) links[i].size_write = 1; - } - if (links[i].size_write == 1) { - char sig = kResetMark; - ssize_t len = links[i].sock.Send(&sig, sizeof(sig)); - if (len == sizeof(sig)) links[i].size_write = 2; - } - if (links[i].size_read == 0) { - int atmark = links[i].sock.AtMark(); - if (atmark < 0) { - utils::Assert(links[i].sock.BadSocket(), "must already gone bad"); - } else if (atmark > 0) { - links[i].size_read = 1; - } else { - // no at mark, read and discard data - ssize_t len = links[i].sock.Recv(links[i].buffer_head, links[i].buffer_size); - // zero length, remote closed the connection, close socket - if (len == 0) links[i].sock.Close(); - } - } - } - utils::SelectHelper rsel; - bool finished = true; - for (int i = 0; i < nlink; ++i) { - if (links[i].size_write != 2 && !links[i].sock.BadSocket()) { - rsel.WatchWrite(links[i].sock); finished = false; - } - if (links[i].size_read == 0 && !links[i].sock.BadSocket()) { - rsel.WatchRead(links[i].sock); finished = false; - } - } - if (finished) break; - // wait to read from the channels to discard data - rsel.Select(); - } - // start synchronization, use blocking I/O to avoid select - for (int i = 0; i < nlink; ++i) { - if (!links[i].sock.BadSocket()) { - char oob_mark; - links[i].sock.SetNonBlock(false); - ssize_t len = links[i].sock.Recv(&oob_mark, sizeof(oob_mark), MSG_WAITALL); - if (len == 0) { - links[i].sock.Close(); continue; - } else if (len > 0) { - utils::Assert(oob_mark == kResetMark, "wrong oob msg"); - utils::Assert(!links[i].sock.AtMark(), "should already read past mark"); - } else { - utils::Assert(errno != EAGAIN|| errno != EWOULDBLOCK, "BUG"); - } - // send out ack - char ack = kResetAck; - while (true) { - len = links[i].sock.Send(&ack, sizeof(ack)); - if (len == sizeof(ack)) break; - if (len == -1) { - if (errno != EAGAIN && errno != EWOULDBLOCK) break; - } - } - } - } - // wait all ack - for (int i = 0; i < nlink; ++i) { - if (!links[i].sock.BadSocket()) { - char ack; - ssize_t len = links[i].sock.Recv(&ack, sizeof(ack), MSG_WAITALL); - if (len == 0) { - links[i].sock.Close(); continue; - } else if (len > 0) { - utils::Assert(ack == kResetAck, "wrong Ack MSG"); - } else { - utils::Assert(errno != EAGAIN|| errno != EWOULDBLOCK, "BUG"); - } - // set back to nonblock mode - links[i].sock.SetNonBlock(true); - } - } - for (int i = 0; i < nlink; ++i) { - if (links[i].sock.BadSocket()) return kSockError; - } - return kSuccess; - } - // Run AllReduce, return if success - inline ReturnType TryAllReduce(void *sendrecvbuf_, - size_t type_nbytes, - size_t count, - ReduceFunction reducer) { - if (links.size() == 0) return kSuccess; - // total size of message - const size_t total_size = type_nbytes * count; - // number of links - const int nlink = static_cast(links.size()); - // send recv buffer - char *sendrecvbuf = reinterpret_cast(sendrecvbuf_); - // size of space that we already performs reduce in up pass - size_t size_up_reduce = 0; - // size of space that we have already passed to parent - size_t size_up_out = 0; - // size of message we received, and send in the down pass - size_t size_down_in = 0; - // initialize the link ring-buffer and pointer - for (int i = 0; i < nlink; ++i) { - if (i != parent_index) { - links[i].InitBuffer(type_nbytes, count, reduce_buffer_size); - } - links[i].ResetSize(); - } - // if no childs, no need to reduce - if (nlink == static_cast(parent_index != -1)) { - size_up_reduce = total_size; - } - - // while we have not passed the messages out - while (true) { - // select helper - utils::SelectHelper selecter; - for (size_t i = 0; i < links.size(); ++i) { - selecter.WatchRead(links[i].sock); - selecter.WatchWrite(links[i].sock); - selecter.WatchException(links[i].sock); - } - // select must return - selecter.Select(); - // exception handling - for (int i = 0; i < nlink; ++i) { - // recive OOB message from some link - if (selecter.CheckExcept(links[i].sock)) return kGetExcept; - } - // read data from childs - for (int i = 0; i < nlink; ++i) { - if (i != parent_index && selecter.CheckRead(links[i].sock)) { - if (!links[i].ReadToRingBuffer(size_up_out)) return kSockError; - } - } - // this node have childs, peform reduce - if (nlink > static_cast(parent_index != -1)) { - size_t buffer_size = 0; - // do upstream reduce - size_t max_reduce = total_size; - for (int i = 0; i < nlink; ++i) { - if (i != parent_index) { - max_reduce= std::min(max_reduce, links[i].size_read); - utils::Assert(buffer_size == 0 || buffer_size == links[i].buffer_size, - "buffer size inconsistent"); - buffer_size = links[i].buffer_size; - } - } - utils::Assert(buffer_size != 0, "must assign buffer_size"); - // round to type_n4bytes - max_reduce = (max_reduce / type_nbytes * type_nbytes); - // peform reduce, can be at most two rounds - while (size_up_reduce < max_reduce) { - // start position - size_t start = size_up_reduce % buffer_size; - // peform read till end of buffer - size_t nread = std::min(buffer_size - start, max_reduce - size_up_reduce); - utils::Assert(nread % type_nbytes == 0, "AllReduce: size check"); - for (int i = 0; i < nlink; ++i) { - if (i != parent_index) { - reducer(links[i].buffer_head + start, - sendrecvbuf + size_up_reduce, - static_cast(nread / type_nbytes), - MPI::Datatype(type_nbytes)); - } - } - size_up_reduce += nread; - } - } - if (parent_index != -1) { - // pass message up to parent, can pass data that are already been reduced - if (selecter.CheckWrite(links[parent_index].sock)) { - ssize_t len = links[parent_index].sock. - Send(sendrecvbuf + size_up_out, size_up_reduce - size_up_out); - if (len != -1) { - size_up_out += static_cast(len); - } else { - if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; - } - } - // read data from parent - if (selecter.CheckRead(links[parent_index].sock) && total_size > size_down_in) { - ssize_t len = links[parent_index].sock. - Recv(sendrecvbuf + size_down_in, total_size - size_down_in); - if (len == 0) { - links[parent_index].sock.Close(); return kSockError; - } - if (len != -1) { - size_down_in += static_cast(len); - utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error"); - } else { - if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; - } - } - } else { - // this is root, can use reduce as most recent point - size_down_in = size_up_out = size_up_reduce; - } - // check if we finished the job of message passing - size_t nfinished = size_down_in; - // can pass message down to childs - for (int i = 0; i < nlink; ++i) { - if (i != parent_index) { - if (selecter.CheckWrite(links[i].sock)) { - if (!links[i].WriteFromArray(sendrecvbuf, size_down_in)) return kSockError; - } - nfinished = std::min(links[i].size_write, nfinished); - } - } - // check boundary condition - if (nfinished >= total_size) break; - } - return kSuccess; - } - - private: - // link record to a neighbor - struct LinkRecord { - public: - // socket to get data from/to link - utils::TCPSocket sock; - // size of data readed from link - size_t size_read; - // size of data sent to the link - size_t size_write; - // pointer to buffer head - char *buffer_head; - // buffer size, in bytes - size_t buffer_size; - // exception - bool except; - // constructor - LinkRecord(void) {} - - // initialize buffer - inline void InitBuffer(size_t type_nbytes, size_t count, size_t reduce_buffer_size) { - size_t n = (type_nbytes * count + 7)/ 8; - buffer_.resize(std::min(reduce_buffer_size, n)); - // make sure align to type_nbytes - buffer_size = buffer_.size() * sizeof(uint64_t) / type_nbytes * type_nbytes; - utils::Assert(type_nbytes <= buffer_size, "too large type_nbytes=%lu, buffer_size=%lu", type_nbytes, buffer_size); - // set buffer head - buffer_head = reinterpret_cast(BeginPtr(buffer_)); - } - // reset the recv and sent size - inline void ResetSize(void) { - size_write = size_read = 0; - } - /*! - * \brief read data into ring-buffer, with care not to existing useful override data - * position after protect_start - * \param protect_start all data start from protect_start is still needed in buffer - * read shall not override this - * \return true if it is an successful read, false if there is some error happens, check errno - */ - inline bool ReadToRingBuffer(size_t protect_start) { - size_t ngap = size_read - protect_start; - utils::Assert(ngap <= buffer_size, "AllReduce: boundary check"); - size_t offset = size_read % buffer_size; - size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); - if (nmax == 0) return true; - ssize_t len = sock.Recv(buffer_head + offset, nmax); - // length equals 0, remote disconnected - if (len == 0) { - sock.Close(); return false; - } - if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; - size_read += static_cast(len); - return true; - } - /*! - * \brief read data into array, - * this function can not be used together with ReadToRingBuffer - * a link can either read into the ring buffer, or existing array - * \param max_size maximum size of array - * \return true if it is an successful read, false if there is some error happens, check errno - */ - inline bool ReadToArray(void *recvbuf_, size_t max_size) { - if (max_size == size_read) return true; - char *p = static_cast(recvbuf_); - ssize_t len = sock.Recv(p + size_read, max_size - size_read); - // length equals 0, remote disconnected - if (len == 0) { - sock.Close(); return false; - } - if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; - size_read += static_cast(len); - return true; - } - /*! - * \brief write data in array to sock - * \param sendbuf_ head of array - * \param max_size maximum size of array - * \return true if it is an successful write, false if there is some error happens, check errno - */ - inline bool WriteFromArray(const void *sendbuf_, size_t max_size) { - const char *p = static_cast(sendbuf_); - ssize_t len = sock.Send(p + size_write, max_size - size_write); - if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; - size_write += static_cast(len); - return true; - } - - private: - // recv buffer to get data from child - // aligned with 64 bits, will be able to perform 64 bits operations freely - std::vector buffer_; - }; - // data structure to remember result of Bcast and AllReduce calls - class ResultBuffer { - public: - // constructor - ResultBuffer(void) { - this->Clear(); - } - // clear the existing record - inline void Clear(void) { - seqno_.clear(); size_.clear(); - rptr_.clear(); rptr_.push_back(0); - data_.clear(); - } - // allocate temporal space for - inline void *AllocTemp(size_t type_nbytes, size_t count) { - size_t size = type_nbytes * count; - size_t nhop = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); - utils::Assert(nhop != 0, "cannot allocate 0 size memory"); - data_.resize(rptr_.back() + nhop); - return BeginPtr(data_) + rptr_.back(); - } - // push the result in temp to the - inline void PushTemp(int seqid, size_t type_nbytes, size_t count) { - size_t size = type_nbytes * count; - size_t nhop = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); - if (seqno_.size() != 0) { - utils::Assert(seqno_.back() < seqid, "PushTemp seqid inconsistent"); - } - seqno_.push_back(seqid); - rptr_.push_back(rptr_.back() + nhop); - size_.push_back(size); - utils::Assert(data_.size() == rptr_.back(), "PushTemp inconsistent"); - } - // return the stored result of seqid, if any - inline void* Query(int seqid, size_t *p_size) { - size_t idx = std::lower_bound(seqno_.begin(), seqno_.end(), seqid) - seqno_.begin(); - if (idx == seqno_.size() || seqno_[idx] != seqid) return NULL; - *p_size = size_[idx]; - return BeginPtr(data_) + rptr_[idx]; - } - private: - // sequence number of each - std::vector seqno_; - // pointer to the positions - std::vector rptr_; - // actual size of each buffer - std::vector size_; - // content of the buffer - std::vector data_; - }; - //---- recovery data structure ---- - // call sequence counter, records how many calls we made so far - // from last call to CheckPoint, LoadCheckPoint - int seq_counter; - // result buffer - ResultBuffer resbuf; - // model that is saved from last CheckPoint - std::string check_point; - //---- local data related to link ---- - // index of parent link, can be -1, meaning this is root of the tree - int parent_index; - // sockets of all links - std::vector links; - //----- meta information----- - // uri of current host, to be set by Init - std::string host_uri; - // uri of master - std::string master_uri; - // port of master address - int master_port; - // port of slave process - int slave_port, nport_trial; - // reduce buffer size - size_t reduce_buffer_size; - // current rank - int rank; - // world size - int world_size; -}; - -// singleton sync manager -AllReduceManager manager; - -/*! \brief intiialize the synchronization module */ -void Init(int argc, char *argv[]) { - for (int i = 1; i < argc; ++i) { - char name[256], val[256]; - if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { - manager.SetParam(name, val); - } - } - manager.Init(); -} - -/*! \brief finalize syncrhonization module */ -void Finalize(void) { - manager.Shutdown(); -} -/*! \brief singleton method to get engine */ -IEngine *GetEngine(void) { - return &manager; -} -} // namespace engine diff --git a/src/engine_robust.h b/src/engine_robust.h new file mode 100644 index 000000000..f1949e11a --- /dev/null +++ b/src/engine_robust.h @@ -0,0 +1,206 @@ +/*! + * \file engine_robust.h + * \brief Robust implementation of AllReduce + * using TCP non-block socket and tree-shape reduction. + * + * This implementation considers the failure of nodes + * + * \author Tianqi, Nacho, Tianyi + */ +#ifndef ALLREDUCE_ENGINE_ROBUST_H +#define ALLREDUCE_ENGINE_ROBUST_H +#include "./engine.h" +#include "./engine_base.h" + +namespace engine { +/*! \brief implementation of fault tolerant all reduce engine */ +class AllReduceRobust : public AllReduceBase { + public: + virtual ~AllReduceRobust(void) {} + /*! + * \brief perform in-place allreduce, on sendrecvbuf + * this function is NOT thread-safe + * \param sendrecvbuf_ buffer for both sending and recving data + * \param type_nbytes the unit number of bytes the type have + * \param count number of elements to be reduced + * \param reducer reduce function + */ + virtual void AllReduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer); + /*! + * \brief broadcast data from root to all nodes + * \param sendrecvbuf_ buffer for both sending and recving data + * \param size the size of the data to be broadcasted + * \param root the root worker id to broadcast the data + */ + virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root); + /*! + * \brief load latest check point + * \param p_model pointer to the model + * \return true if there was stored checkpoint and load was successful + * false if there was no stored checkpoint, means we are start over gain + */ + virtual bool LoadCheckPoint(utils::ISerializable *p_model); + /*! + * \brief checkpoint the model, meaning we finished a stage of execution + * \param p_model pointer to the model + */ + virtual void CheckPoint(const utils::ISerializable &model); + + private: + // constant one byte out of band message to indicate error happening + // and mark for channel cleanup + const static char kOOBReset = 95; + // and mark for channel cleanup, after OOB signal + const static char kResetMark = 97; + // and mark for channel cleanup + const static char kResetAck = 97; + /*! + * \brief summary of actions proposed in all nodes + * this data structure is used to make consensus decision + * about next action to take in the recovery mode + */ + struct ActionSummary { + // maximumly allowed sequence id + const static int kMaxSeq = 1 << 26; + //--------------------------------------------- + // The following are bit mask of flag used in + //---------------------------------------------- + // some node want to load check point + const static int kLoadCheck = 1; + // some node want to do check point + const static int kCheckPoint = 2; + // check point Ack, we use a two phase message in check point, + // this is the second phase of check pointing + const static int kCheckAck = 4; + // there are difference sequence number the nodes proposed + // this means we want to do recover execution of the lower sequence + // action instead of normal execution + const static int kDiffSeq = 8; + // constructor + ActionSummary(void) {} + // constructor of action + ActionSummary(int flag, int minseqno = kMaxSeq) { + seqcode = (minseqno << 4) | flag; + } + // minimum number of all operations + inline int min_seqno(void) const { + return seqcode >> 4; + } + // whether the operation set contains a check point + inline bool check_point(void) const { + return (seqcode & kCheckPoint) != 0; + } + // whether the operation set contains a check point + inline bool check_ack(void) const { + return (seqcode & kCheckAck) != 0; + } + // whether the operation set contains a check point + inline bool diff_seq(void) const { + return (seqcode & kDiffSeq) != 0; + } + // returns the operation flag of the result + inline int flag(void) const { + return seqcode & 15; + } + // reducer for AllReduce, used to get the result ActionSummary from all nodes + inline static void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) { + const ActionSummary *src = (const ActionSummary*)src_; + ActionSummary *dst = (ActionSummary*)dst_; + for (int i = 0; i < len; ++i) { + int src_seqno = src[i].min_seqno(); + int dst_seqno = dst[i].min_seqno(); + int flag = src[i].flag() | dst[i].flag(); + if (src_seqno == dst_seqno) { + dst[i] = ActionSummary(flag, src_seqno); + } else { + dst[i] = ActionSummary(flag | kDiffSeq, std::min(src_seqno, dst_seqno)); + } + } + } + + private: + // internel sequence code + int seqcode; + }; + /*! \brief data structure to remember result of Bcast and AllReduce calls */ + class ResultBuffer { + public: + // constructor + ResultBuffer(void) { + this->Clear(); + } + // clear the existing record + inline void Clear(void) { + seqno_.clear(); size_.clear(); + rptr_.clear(); rptr_.push_back(0); + data_.clear(); + } + // allocate temporal space for + inline void *AllocTemp(size_t type_nbytes, size_t count) { + size_t size = type_nbytes * count; + size_t nhop = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); + utils::Assert(nhop != 0, "cannot allocate 0 size memory"); + data_.resize(rptr_.back() + nhop); + return BeginPtr(data_) + rptr_.back(); + } + // push the result in temp to the + inline void PushTemp(int seqid, size_t type_nbytes, size_t count) { + size_t size = type_nbytes * count; + size_t nhop = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); + if (seqno_.size() != 0) { + utils::Assert(seqno_.back() < seqid, "PushTemp seqid inconsistent"); + } + seqno_.push_back(seqid); + rptr_.push_back(rptr_.back() + nhop); + size_.push_back(size); + utils::Assert(data_.size() == rptr_.back(), "PushTemp inconsistent"); + } + // return the stored result of seqid, if any + inline void* Query(int seqid, size_t *p_size) { + size_t idx = std::lower_bound(seqno_.begin(), seqno_.end(), seqid) - seqno_.begin(); + if (idx == seqno_.size() || seqno_[idx] != seqid) return NULL; + *p_size = size_[idx]; + return BeginPtr(data_) + rptr_[idx]; + } + private: + // sequence number of each + std::vector seqno_; + // pointer to the positions + std::vector rptr_; + // actual size of each buffer + std::vector size_; + // content of the buffer + std::vector data_; + }; + /*! + * \brief reset the all the existing links by sending Out-of-Band message marker + * after this function finishes, all the messages received and sent before in all live links are discarded, + * This allows us to get a fresh start after error has happened + * + * \return this function can return kSuccess or kSockError + * when kSockError is returned, it simply means there are bad sockets in the links, + * and some link recovery proceduer is needed + */ + ReturnType TryResetLinks(void); + /*! + * \brief Run recovery execution of a action specified by flag and seqno, + * there can be two outcome of the function + * + * \param sendrecvbuf_ + * + * \return if this function returns true, this means + * behind and we will be able to recover data from existing node + */ + bool RecoverExec(void *sendrecvbuf_, size_t size, int flag, int seqno); + //---- recovery data structure ---- + // call sequence counter, records how many calls we made so far + // from last call to CheckPoint, LoadCheckPoint + int seq_counter; + // result buffer + ResultBuffer resbuf; +}; +} // namespace engine +#endif // ALLREDUCE_ENGINE_ROBUST_H diff --git a/src/engine_tcp.cpp b/src/engine_tcp.cpp deleted file mode 100644 index 4cbbe384f..000000000 --- a/src/engine_tcp.cpp +++ /dev/null @@ -1,485 +0,0 @@ -/*! - * \file engine_tcp.cpp - * \brief implementation of sync AllReduce using TCP sockets - * with use non-block socket and tree-shape reduction - * \author Tianqi Chen - */ -#define _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_DEPRECATE -#define NOMINMAX -#include -#include -#include -#include "./engine.h" -#include "./socket.h" - -namespace MPI { -class Datatype { - public: - size_t type_size; - Datatype(size_t type_size) : type_size(type_size) {} -}; -} -namespace engine { -/*! \brief implementation of sync goes to here */ -class SyncManager : public IEngine { - public: - const static int kMagic = 0xff99; - SyncManager(void) { - master_uri = "NULL"; - master_port = 9000; - host_uri = ""; - slave_port = 9010; - nport_trial = 1000; - rank = 0; - world_size = 1; - this->SetParam("reduce_buffer", "256MB"); - } - ~SyncManager(void) { - } - - inline void Shutdown(void) { - for (size_t i = 0; i < links.size(); ++i) { - links[i].sock.Close(); - } - links.clear(); - utils::TCPSocket::Finalize(); - } - /*! \brief set parameters to the sync manager */ - inline void SetParam(const char *name, const char *val) { - if (!strcmp(name, "master_uri")) master_uri = val; - if (!strcmp(name, "master_port")) master_port = atoi(val); - if (!strcmp(name, "reduce_buffer")) { - char unit; - unsigned long amount; - if (sscanf(val, "%lu%c", &amount, &unit) == 2) { - switch (unit) { - case 'B': reduce_buffer_size = (amount + 7)/ 8; break; - case 'K': reduce_buffer_size = amount << 7UL; break; - case 'M': reduce_buffer_size = amount << 17UL; break; - case 'G': reduce_buffer_size = amount << 27UL; break; - default: utils::Error("invalid format for reduce buffer"); - } - } else { - utils::Error("invalid format for reduce_buffer, shhould be {integer}{unit}, unit can be {B, KB, MB, GB}"); - } - } - } - // initialize the manager - inline void Init(void) { - utils::Socket::Startup(); - // single node mode - if (master_uri == "NULL") return; - utils::Assert(links.size() == 0, "can only call Init once"); - int magic = kMagic; - int nchild = 0, nparent = 0; - this->host_uri = utils::SockAddr::GetHostName(); - // get information from master - utils::TCPSocket master; - master.Create(); - if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) { - utils::Socket::Error("Connect"); - } - utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 1"); - utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 2"); - utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); - utils::Assert(master.RecvAll(&rank, sizeof(rank)) == sizeof(rank), "sync::Init failure 3"); - utils::Assert(master.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), "sync::Init failure 4"); - utils::Assert(master.RecvAll(&nparent, sizeof(nparent)) == sizeof(nparent), "sync::Init failure 5"); - utils::Assert(master.RecvAll(&nchild, sizeof(nchild)) == sizeof(nchild), "sync::Init failure 6"); - utils::Assert(nchild >= 0, "in correct number of childs"); - utils::Assert(nparent == 1 || nparent == 0, "in correct number of parent"); - - // create listen - utils::TCPSocket sock_listen; - sock_listen.Create(); - int port = sock_listen.TryBindHost(slave_port, slave_port + nport_trial); - utils::Check(port != -1, "sync::Init fail to bind the ports specified"); - sock_listen.Listen(); - - if (nparent != 0) { - parent_index = 0; - links.push_back(LinkRecord()); - int len, hport; - std::string hname; - utils::Assert(master.RecvAll(&len, sizeof(len)) == sizeof(len), "sync::Init failure 9"); - hname.resize(len); - utils::Assert(len != 0, "string must not be empty"); - utils::Assert(master.RecvAll(&hname[0], len) == static_cast(len), "sync::Init failure 10"); - utils::Assert(master.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "sync::Init failure 11"); - links[0].sock.Create(); - links[0].sock.Connect(utils::SockAddr(hname.c_str(), hport)); - utils::Assert(links[0].sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 12"); - utils::Assert(links[0].sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 13"); - utils::Check(magic == kMagic, "sync::Init failure, parent magic number mismatch"); - parent_index = 0; - } else { - parent_index = -1; - } - // send back socket listening port to master - utils::Assert(master.SendAll(&port, sizeof(port)) == sizeof(port), "sync::Init failure 14"); - // close connection to master - master.Close(); - // accept links from childs - for (int i = 0; i < nchild; ++i) { - LinkRecord r; - while (true) { - r.sock = sock_listen.Accept(); - if (r.sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic) && magic == kMagic) { - utils::Assert(r.sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 15"); - break; - } else { - // not a valid child - r.sock.Close(); - } - } - links.push_back(r); - } - // close listening sockets - sock_listen.Close(); - // setup selecter - selecter.Clear(); - for (size_t i = 0; i < links.size(); ++i) { - // set the socket to non-blocking mode - links[i].sock.SetNonBlock(true); - selecter.WatchRead(links[i].sock); - selecter.WatchWrite(links[i].sock); - } - // done - } - /*! \brief get rank */ - virtual int GetRank(void) const { - return rank; - } - /*! \brief get rank */ - virtual int GetWorldSize(void) const { - return world_size; - } - /*! \brief get rank */ - virtual std::string GetHost(void) const { - return host_uri; - } - /*! - * \brief perform in-place allreduce, on sendrecvbuf - * this function is NOT thread-safe - * \param sendrecvbuf_ buffer for both sending and recving data - * \param type_n4bytes the unit number of bytes the type have - * \param count number of elements to be reduced - * \param reducer reduce function - */ - virtual void AllReduce(void *sendrecvbuf_, - size_t type_nbytes, - size_t count, - ReduceFunction reducer) { - if (links.size() == 0) return; - // total size of message - const size_t total_size = type_nbytes * count; - // number of links - const int nlink = static_cast(links.size()); - // send recv buffer - char *sendrecvbuf = reinterpret_cast(sendrecvbuf_); - // size of space that we already performs reduce in up pass - size_t size_up_reduce = 0; - // size of space that we have already passed to parent - size_t size_up_out = 0; - // size of message we received, and send in the down pass - size_t size_down_in = 0; - - // initialize the link ring-buffer and pointer - for (int i = 0; i < nlink; ++i) { - if (i != parent_index) { - links[i].InitBuffer(type_nbytes, count, reduce_buffer_size); - } - links[i].ResetSize(); - } - // if no childs, no need to reduce - if (nlink == static_cast(parent_index != -1)) { - size_up_reduce = total_size; - } - - // while we have not passed the messages out - while(true) { - selecter.Select(); - // read data from childs - for (int i = 0; i < nlink; ++i) { - if (i != parent_index && selecter.CheckRead(links[i].sock)) { - if (!links[i].ReadToRingBuffer(size_up_out)) { - utils::Socket::Error("Recv"); - } - } - } - // this node have childs, peform reduce - if (nlink > static_cast(parent_index != -1)) { - size_t buffer_size = 0; - // do upstream reduce - size_t max_reduce = total_size; - for (int i = 0; i < nlink; ++i) { - if (i != parent_index) { - max_reduce= std::min(max_reduce, links[i].size_read); - utils::Assert(buffer_size == 0 || buffer_size == links[i].buffer_size, - "buffer size inconsistent"); - buffer_size = links[i].buffer_size; - } - } - utils::Assert(buffer_size != 0, "must assign buffer_size"); - // round to type_n4bytes - max_reduce = (max_reduce / type_nbytes * type_nbytes); - // peform reduce, can be at most two rounds - while (size_up_reduce < max_reduce) { - // start position - size_t start = size_up_reduce % buffer_size; - // peform read till end of buffer - size_t nread = std::min(buffer_size - start, max_reduce - size_up_reduce); - utils::Assert(nread % type_nbytes == 0, "AllReduce: size check"); - for (int i = 0; i < nlink; ++i) { - if (i != parent_index) { - reducer(links[i].buffer_head + start, - sendrecvbuf + size_up_reduce, - static_cast(nread / type_nbytes), - MPI::Datatype(type_nbytes)); - } - } - size_up_reduce += nread; - } - } - if (parent_index != -1) { - // pass message up to parent, can pass data that are already been reduced - if (selecter.CheckWrite(links[parent_index].sock)) { - ssize_t len = links[parent_index].sock. - Send(sendrecvbuf + size_up_out, size_up_reduce - size_up_out); - if (len != -1) { - size_up_out += static_cast(len); - } else { - if (errno != EAGAIN && errno != EWOULDBLOCK) utils::Socket::Error("Recv"); - } - } - // read data from parent - if (selecter.CheckRead(links[parent_index].sock)) { - ssize_t len = links[parent_index].sock. - Recv(sendrecvbuf + size_down_in, total_size - size_down_in); - if (len != -1) { - size_down_in += static_cast(len); - utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error"); - } else { - if (errno != EAGAIN && errno != EWOULDBLOCK) utils::Socket::Error("Recv"); - } - } - } else { - // this is root, can use reduce as most recent point - size_down_in = size_up_out = size_up_reduce; - } - // check if we finished the job of message passing - size_t nfinished = size_down_in; - // can pass message down to childs - for (int i = 0; i < nlink; ++i) { - if (i != parent_index) { - if (selecter.CheckWrite(links[i].sock)) { - if (!links[i].WriteFromArray(sendrecvbuf, size_down_in)) { - utils::Socket::Error("Send"); - } - } - nfinished = std::min(links[i].size_write, nfinished); - } - } - // check boundary condition - if (nfinished >= total_size) break; - } - } - /*! - * \brief broadcast data from root to all nodes - * \param sendrecvbuf_ buffer for both sending and recving data - * \param size the size of the data to be broadcasted - * \param root the root worker id to broadcast the data - */ - virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { - if (links.size() == 0) return; - // number of links - const int nlink = static_cast(links.size()); - // size of space already read from data - size_t size_in = 0; - // input link, -2 means unknown yet, -1 means this is root - int in_link = -2; - - // initialize the link statistics - for (int i = 0; i < nlink; ++i) { - links[i].ResetSize(); - } - // root have all the data - if (this->rank == root) { - size_in = total_size; - in_link = -1; - } - - // while we have not passed the messages out - while(true) { - selecter.Select(); - if (in_link == -2) { - // probe in-link - for (int i = 0; i < nlink; ++i) { - if (selecter.CheckRead(links[i].sock)) { - if (!links[i].ReadToArray(sendrecvbuf_, total_size)) { - utils::Socket::Error("Recv"); - } - size_in = links[i].size_read; - if (size_in != 0) { - in_link = i; break; - } - } - } - } else { - // read from in link - if (in_link >= 0 && selecter.CheckRead(links[in_link].sock)) { - if(!links[in_link].ReadToArray(sendrecvbuf_, total_size)) { - utils::Socket::Error("Recv"); - } - size_in = links[in_link].size_read; - } - } - size_t nfinished = total_size; - // send data to all out-link - for (int i = 0; i < nlink; ++i) { - if (i != in_link) { - if (selecter.CheckWrite(links[i].sock)) { - if (!links[i].WriteFromArray(sendrecvbuf_, size_in)) { - utils::Socket::Error("Send"); - } - } - nfinished = std::min(nfinished, links[i].size_write); - } - } - // check boundary condition - if (nfinished >= total_size) break; - } - } - virtual bool LoadCheckPoint(utils::ISerializable *p_model) { - return false; - } - virtual void CheckPoint(const utils::ISerializable &model) { - } - - private: - // an independent child record - struct LinkRecord { - public: - // socket to get data from/to link - utils::TCPSocket sock; - // size of data readed from link - size_t size_read; - // size of data sent to the link - size_t size_write; - // pointer to buffer head - char *buffer_head; - // buffer size, in bytes - size_t buffer_size; - // initialize buffer - inline void InitBuffer(size_t type_nbytes, size_t count, size_t reduce_buffer_size) { - size_t n = (type_nbytes * count + 7)/ 8; - buffer_.resize(std::min(reduce_buffer_size, n)); - // make sure align to type_nbytes - buffer_size = buffer_.size() * sizeof(uint64_t) / type_nbytes * type_nbytes; - utils::Assert(type_nbytes <= buffer_size, "too large type_nbytes=%lu, buffer_size=%lu", type_nbytes, buffer_size); - // set buffer head - buffer_head = reinterpret_cast(BeginPtr(buffer_)); - } - // reset the recv and sent size - inline void ResetSize(void) { - size_write = size_read = 0; - } - /*! - * \brief read data into ring-buffer, with care not to existing useful override data - * position after protect_start - * \param protect_start all data start from protect_start is still needed in buffer - * read shall not override this - * \return true if it is an successful read, false if there is some error happens, check errno - */ - inline bool ReadToRingBuffer(size_t protect_start) { - size_t ngap = size_read - protect_start; - utils::Assert(ngap <= buffer_size, "AllReduce: boundary check"); - size_t offset = size_read % buffer_size; - size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); - ssize_t len = sock.Recv(buffer_head + offset, nmax); - if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; - size_read += static_cast(len); - return true; - } - /*! - * \brief read data into array, - * this function can not be used together with ReadToRingBuffer - * a link can either read into the ring buffer, or existing array - * \param max_size maximum size of array - * \return true if it is an successful read, false if there is some error happens, check errno - */ - inline bool ReadToArray(void *recvbuf_, size_t max_size) { - char *p = static_cast(recvbuf_); - ssize_t len = sock.Recv(p + size_read, max_size - size_read); - if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; - size_read += static_cast(len); - return true; - } - /*! - * \brief write data in array to sock - * \param sendbuf_ head of array - * \param max_size maximum size of array - * \return true if it is an successful write, false if there is some error happens, check errno - */ - inline bool WriteFromArray(const void *sendbuf_, size_t max_size) { - const char *p = static_cast(sendbuf_); - ssize_t len = sock.Send(p + size_write, max_size - size_write); - if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; - size_write += static_cast(len); - return true; - } - - private: - // recv buffer to get data from child - // aligned with 64 bits, will be able to perform 64 bits operations freely - std::vector buffer_; - }; - //------------------ - // uri of current host, to be set by Init - std::string host_uri; - // uri of master - std::string master_uri; - // port of master address - int master_port; - // port of slave process - int slave_port, nport_trial; - // reduce buffer size - size_t reduce_buffer_size; - // current rank - int rank; - // world size - int world_size; - // index of parent link, can be -1, meaning this is root of the tree - int parent_index; - // sockets of all links - std::vector links; - // select helper - utils::SelectHelper selecter; - -}; - -// singleton sync manager -SyncManager manager; - -/*! \brief intiialize the synchronization module */ -void Init(int argc, char *argv[]) { - for (int i = 1; i < argc; ++i) { - char name[256], val[256]; - if (sscanf(argv[i], "%[^=]=%s", name, val) == 2) { - manager.SetParam(name, val); - } - } - manager.Init(); -} - -/*! \brief finalize syncrhonization module */ -void Finalize(void) { - manager.Shutdown(); -} -/*! \brief singleton method to get engine */ -IEngine *GetEngine(void) { - return &manager; -} - -} // namespace engine diff --git a/src/utils.h b/src/utils.h index 2c529c449..81bba7dfd 100644 --- a/src/utils.h +++ b/src/utils.h @@ -76,6 +76,9 @@ inline void HandleCheckError(const char *msg) { inline void HandlePrint(const char *msg) { printf("%s", msg); } +inline void HandleLogPrint(const char *msg) { + fprintf(stderr, "%s", msg); +} #else #ifndef ALLREDUCE_STRICT_CXX98_ // include declarations, some one must implement this @@ -101,6 +104,15 @@ inline void Printf(const char *fmt, ...) { va_end(args); HandlePrint(msg.c_str()); } +/*! \brief printf, print message to the console */ +inline void LogPrintf(const char *fmt, ...) { + std::string msg(kPrintBuffer, '\0'); + va_list args; + va_start(args, fmt); + vsnprintf(&msg[0], kPrintBuffer, fmt, args); + va_end(args); + HandleLogPrint(msg.c_str()); +} /*! \brief portable version of snprintf */ inline int SPrintf(char *buf, size_t size, const char *fmt, ...) { va_list args; diff --git a/test/Makefile b/test/Makefile index c773fe45b..49aca06e1 100644 --- a/test/Makefile +++ b/test/Makefile @@ -12,23 +12,25 @@ endif # specify tensor path BIN = test_allreduce -OBJ = engine_robust.o engine_tcp.o +OBJ = engine_base.o engine_robust.o engine.o .PHONY: clean all all: $(BIN) $(MPIBIN) engine_tcp.o: ../src/engine_tcp.cpp ../src/*.h -engine_robust.o: ../src/engine_robust.cpp ../src/*.h -test_allreduce: test_allreduce.cpp ../src/*.h engine_robust.o +engine_base.o: ../src/engine_base.cc ../src/*.h +engine.o: ../src/engine.cc ../src/*.h +engine_robust.o: ../src/engine_robust.cc ../src/*.h +test_allreduce: test_allreduce.cpp ../src/*.h $(OBJ) $(BIN) : - $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) + $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(OBJ) : - $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) + $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) $(MPIBIN) : - $(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) + $(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) clean: $(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~ diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 40c85ea0b..3a2cc2a9d 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -74,11 +74,11 @@ int main(int argc, char *argv[]) { test::Mock mock(rank, argv[2], argv[3]); - printf("[%d] start at %s\n", rank, name.c_str()); + utils::LogPrintf("[%d] start at %s\n", rank, name.c_str()); TestMax(mock, n); - printf("[%d] !!!TestMax pass\n", rank); + utils::LogPrintf("[%d] !!!TestMax pass\n", rank); TestSum(mock, n); - printf("[%d] !!!TestSum pass\n", rank); + utils::LogPrintf("[%d] !!!TestSum pass\n", rank); sync::Finalize(); printf("[%d] all check pass\n", rank); return 0; From 155ed3a81432bf01c75caa749ba6a01a08a9a4f3 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 29 Nov 2014 22:22:51 -0800 Subject: [PATCH 014/531] seems a OK version of reset, start to work on decide exec --- src/engine_robust.cc | 63 +++++++++++++++++++++++++------------------- src/socket.h | 5 ++-- src/utils.h | 1 + 3 files changed, 40 insertions(+), 29 deletions(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index b969bc9f6..00efd7447 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -17,18 +17,14 @@ void AllReduceRobust::AllReduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) { - utils::LogPrintf("[%d] call AllReduce", rank); - TryResetLinks(); - utils::LogPrintf("[%d] start work", rank); while (true) { ReturnType ret = TryAllReduce(sendrecvbuf_, type_nbytes, count, reducer); if (ret == kSuccess) return; if (ret == kSockError) { utils::Error("error occur during all reduce\n"); } - utils::LogPrintf("[%d] receive except signal, start reset link", rank); + utils::LogPrintf("[%d] receive except signal, start reset link\n", rank); TryResetLinks(); - //utils::Check(TryResetLinks() == kSuccess, "error when reset links"); } // TODO } @@ -70,13 +66,13 @@ void AllReduceRobust::CheckPoint(const utils::ISerializable &model) { * and some link recovery proceduer is needed */ AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { - utils::LogPrintf("[%d] TryResetLinks, start\n", rank); // number of links const int nlink = static_cast(links.size()); for (int i = 0; i < nlink; ++i) { links[i].InitBuffer(sizeof(int), 1 << 10, reduce_buffer_size); links[i].ResetSize(); } + // read and discard data from all channels until pass mark while (true) { for (int i = 0; i < nlink; ++i) { @@ -92,20 +88,6 @@ AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { ssize_t len = links[i].sock.Send(&sig, sizeof(sig)); if (len == sizeof(sig)) links[i].size_write = 2; } - if (links[i].size_read == 0) { - int atmark = links[i].sock.AtMark(); - if (atmark < 0) { - utils::Assert(links[i].sock.BadSocket(), "must already gone bad"); - } else if (atmark > 0) { - links[i].size_read = 1; - } else { - printf("buffer_size=%lu\n", links[i].buffer_size); - // no at mark, read and discard data - ssize_t len = links[i].sock.Recv(links[i].buffer_head, links[i].buffer_size); - // zero length, remote closed the connection, close socket - if (len == 0) links[i].sock.Close(); - } - } } utils::SelectHelper rsel; bool finished = true; @@ -113,15 +95,44 @@ AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { if (links[i].size_write != 2 && !links[i].sock.BadSocket()) { rsel.WatchWrite(links[i].sock); finished = false; } - if (links[i].size_read == 0 && !links[i].sock.BadSocket()) { - rsel.WatchRead(links[i].sock); finished = false; - } } if (finished) break; // wait to read from the channels to discard data rsel.Select(); } - utils::LogPrintf("[%d] Finish discard data\n", rank); + for (int i = 0; i < nlink; ++i) { + if (!links[i].sock.BadSocket()) { + utils::SelectHelper::WaitExcept(links[i].sock); + } + } + while (true) { + for (int i = 0; i < nlink; ++i) { + if (links[i].size_read == 0) { + int atmark = links[i].sock.AtMark(); + if (atmark < 0) { + utils::Assert(links[i].sock.BadSocket(), "must already gone bad"); + } else if (atmark > 0) { + links[i].size_read = 1; + } else { + // no at mark, read and discard data + ssize_t len = links[i].sock.Recv(links[i].buffer_head, links[i].buffer_size); + if (links[i].sock.AtMark()) links[i].size_read = 1; + // zero length, remote closed the connection, close socket + if (len == 0) links[i].sock.Close(); + } + } + } + utils::SelectHelper rsel; + bool finished = true; + for (int i = 0; i < nlink; ++i) { + if (links[i].size_read == 0 && !links[i].sock.BadSocket()) { + rsel.WatchRead(links[i].sock); finished = false; + } + } + if (finished) break; + rsel.Select(); + } + // start synchronization, use blocking I/O to avoid select for (int i = 0; i < nlink; ++i) { if (!links[i].sock.BadSocket()) { @@ -132,7 +143,7 @@ AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { links[i].sock.Close(); continue; } else if (len > 0) { utils::Assert(oob_mark == kResetMark, "wrong oob msg"); - utils::Assert(!links[i].sock.AtMark(), "should already read past mark"); + utils::Assert(links[i].sock.AtMark() != 1, "should already read past mark"); } else { utils::Assert(errno != EAGAIN|| errno != EWOULDBLOCK, "BUG"); } @@ -147,7 +158,6 @@ AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { } } } - utils::LogPrintf("[%d] GGet all Acks\n", rank); // wait all ack for (int i = 0; i < nlink; ++i) { if (!links[i].sock.BadSocket()) { @@ -167,7 +177,6 @@ AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { for (int i = 0; i < nlink; ++i) { if (links[i].sock.BadSocket()) return kSockError; } - utils::LogPrintf("[%d] TryResetLinks,!! return\n", rank); return kSuccess; } diff --git a/src/socket.h b/src/socket.h index a5238a6c0..8f6d969e6 100644 --- a/src/socket.h +++ b/src/socket.h @@ -177,7 +177,7 @@ class Socket { inline bool BadSocket(void) const { if (IsClosed()) return true; int err = GetSockError(); - if (err == EBADF || err == EINTR) return true; + if (err == EBADF || err == EINTR) return true; return false; } /*! \brief check if socket is already closed */ @@ -250,7 +250,7 @@ class TCPSocket : public Socket{ int atmark; #ifdef _WIN32 if (ioctlsocket(sockfd, SIOCATMARK, &atmark) != NO_ERROR) return -1; -#else +#else if (ioctl(sockfd, SIOCATMARK, &atmark) == -1) return -1; #endif return atmark; @@ -418,6 +418,7 @@ struct SelectHelper { private: inline static int Select_(int maxfd, fd_set *rfds, fd_set *wfds, fd_set *efds, long timeout) { + utils::Assert(maxfd < FD_SETSIZE, "maxdf must be smaller than FDSETSIZE"); if (timeout == 0) { return select(maxfd, rfds, wfds, efds, NULL); } else { diff --git a/src/utils.h b/src/utils.h index 81bba7dfd..a371d6059 100644 --- a/src/utils.h +++ b/src/utils.h @@ -78,6 +78,7 @@ inline void HandlePrint(const char *msg) { } inline void HandleLogPrint(const char *msg) { fprintf(stderr, "%s", msg); + fflush(stderr); } #else #ifndef ALLREDUCE_STRICT_CXX98_ From 2e536eda29673d71ba4021a3c69e70c4e6158288 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 11:42:59 -0800 Subject: [PATCH 015/531] check in the recover strategy --- src/allreduce.h | 4 +- src/engine.h | 2 +- src/engine_base.cc | 14 ++-- src/engine_base.h | 15 +++-- src/engine_robust.cc | 156 +++++++++++++++++++++++++++++++++++++++++-- src/engine_robust.h | 75 +++++++++++++++++---- 6 files changed, 235 insertions(+), 31 deletions(-) diff --git a/src/allreduce.h b/src/allreduce.h index 264541211..c6dccade6 100644 --- a/src/allreduce.h +++ b/src/allreduce.h @@ -2,7 +2,9 @@ #define ALLREDUCE_H /*! * \file allreduce.h - * \brief This file defines a template wrapper of engine to ensure + * \brief This file defines a template wrapper of engine to give more flexible + * AllReduce operations + * * \author Tianqi Chen, Nacho, Tianyi */ #include "./engine.h" diff --git a/src/engine.h b/src/engine.h index 42e19c139..d3493945f 100644 --- a/src/engine.h +++ b/src/engine.h @@ -1,6 +1,6 @@ /*! * \file engine.h - * \brief This file defines the interface of allreduce library + * \brief This file defines the core interface of allreduce library * \author Tianqi Chen, Nacho, Tianyi */ #ifndef ALLREDUCE_ENGINE_H diff --git a/src/engine_base.cc b/src/engine_base.cc index e2eca014f..dd9c8ac56 100644 --- a/src/engine_base.cc +++ b/src/engine_base.cc @@ -1,3 +1,8 @@ +/*! + * \file engine_base.cc + * \brief Basic implementation of AllReduce + * \author Tianqi, Nacho, Tianyi + */ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX @@ -137,10 +142,8 @@ void AllReduceBase::SetParam(const char *name, const char *val) { * \param type_nbytes the unit number of bytes the type have * \param count number of elements to be reduced * \param reducer reduce function - * \return this function can return - * - kSuccess: allreduce is success, - * - kSockError: a neighbor node go down, the connection is dropped - * - kGetExcept: another node which is not my neighbor go down, get Out-of-Band exception notification from my neighbor + * \return this function can return kSuccess, kSockError, kGetExcept, see ReturnType for details + * \sa ReturnType */ AllReduceBase::ReturnType AllReduceBase::TryAllReduce(void *sendrecvbuf_, @@ -278,7 +281,8 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, * \param sendrecvbuf_ buffer for both sending and recving data * \param total_size the size of the data to be broadcasted * \param root the root worker id to broadcast the data - * \return this function can return three possible values, see detail in TryAllReduce + * \return this function can return kSuccess, kSockError, kGetExcept, see ReturnType for details + * \sa ReturnType */ AllReduceBase::ReturnType AllReduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { diff --git a/src/engine_base.h b/src/engine_base.h index 6c138529a..61fdd6033 100644 --- a/src/engine_base.h +++ b/src/engine_base.h @@ -97,8 +97,14 @@ class AllReduceBase : public IEngine { protected: /*! \brief enumeration of possible returning results from Try functions */ enum ReturnType { + /*! \brief execution is successful */ kSuccess, + /*! \brief a neighbor node go down, the connection is dropped */ kSockError, + /*! + * \brief another node which is not my neighbor go down, + * get Out-of-Band exception notification from my neighbor + */ kGetExcept }; // link record to a neighbor @@ -202,10 +208,8 @@ class AllReduceBase : public IEngine { * \param type_nbytes the unit number of bytes the type have * \param count number of elements to be reduced * \param reducer reduce function - * \return this function can return - * - kSuccess: allreduce is success, - * - kSockError: a neighbor node go down, the connection is dropped - * - kGetExcept: another node which is not my neighbor go down, get Out-of-Band exception notification from my neighbor + * \return this function can return kSuccess, kSockError, kGetExcept, see ReturnType for details + * \sa ReturnType */ ReturnType TryAllReduce(void *sendrecvbuf_, size_t type_nbytes, @@ -216,7 +220,8 @@ class AllReduceBase : public IEngine { * \param sendrecvbuf_ buffer for both sending and recving data * \param size the size of the data to be broadcasted * \param root the root worker id to broadcast the data - * \return this function can return three possible values, see detail in TryAllReduce + * \return this function can return kSuccess, kSockError, kGetExcept, see ReturnType for details + * \sa ReturnType */ ReturnType TryBroadcast(void *sendrecvbuf_, size_t size, int root); //---- local data related to link ---- diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 00efd7447..fcc3ebc20 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -1,3 +1,8 @@ +/*! + * \file engine_robust.cc + * \brief Robust implementation of AllReduce + * \author Tianqi, Nacho, Tianyi + */ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX @@ -71,8 +76,7 @@ AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { for (int i = 0; i < nlink; ++i) { links[i].InitBuffer(sizeof(int), 1 << 10, reduce_buffer_size); links[i].ResetSize(); - } - + } // read and discard data from all channels until pass mark while (true) { for (int i = 0; i < nlink; ++i) { @@ -179,12 +183,150 @@ AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { } return kSuccess; } - -bool AllReduceRobust::RecoverExec(void *sendrecvbuf_, size_t size, int flag, int seqno) { - if (flag != 0) { - utils::Assert(seqno == ActionSummary::kMaxSeq, "must only set seqno for normal operations"); +/*! + * \brief try to reconnect the broken links + * \return this function can kSuccess or kSockError + */ +AllReduceRobust::ReturnType AllReduceRobust::TryReConnectLinks(void) { + utils::Error("TryReConnectLinks: not implemented"); + return kSuccess; +} +/*! + * \brief if err_type indicates an error + * recover links according to the error type reported + * if there is no error, return true + * \param err_type the type of error happening in the system + * \return true if err_type is kSuccess, false otherwise + */ +bool AllReduceRobust::CheckAndRecover(ReturnType err_type) { + if (err_type == kSuccess) return true; + while(err_type != kSuccess) { + switch(err_type) { + case kGetExcept: err_type = TryResetLinks(); break; + case kSockError: { + TryResetLinks(); + err_type = TryReConnectLinks(); + break; + } + default: utils::Assert(false, "RecoverLinks: cannot reach here"); + } } - ActionSummary act(flag, seqno); + return false; +} +/*! + * \brief try to load check point + * + * This is a collaborative function called by all nodes + * only the nodes with requester set to true really needs to load the check point + * other nodes acts as collaborative roles to complete this request + * + * \param requester whether current node is the requester + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType + */ +AllReduceRobust::ReturnType AllReduceRobust::TryLoadCheckPoint(bool requester) { + utils::Error("TryLoadCheckPoint: not implemented"); + return kSuccess; +} +/*! + * \brief try to get the result of operation specified by seqno + * + * This is a collaborative function called by all nodes + * only the nodes with requester set to true really needs to get the result + * other nodes acts as collaborative roles to complete this request + * + * \param buf the buffer to store the result, this parameter is only use when current node is requester + * \param size the total size of the buffer, this parameter is only use when current node is requester + * \param seqno sequence number of the operation, this is unique index of a operation in current iteration + * \param requester whether current node is the requester + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType + */ +AllReduceRobust::ReturnType AllReduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { + utils::Error("TryGetResult: not implemented"); + return kSuccess; +} +/*! + * \brief try to run recover execution for a request action described by flag and seqno, + * the function will keep blocking to run possible recovery operations before the specified action, + * until the requested result is received by a recovering procedure, + * or the function discovers that the requested action is not yet executed, and return false + * + * \param buf the buffer to store the result + * \param size the total size of the buffer + * \param flag flag information about the action \sa ActionSummary + * \param seqno sequence number of the action, if it is special action with flag set, seqno needs to be set to ActionSummary::kMaxSeq + * + * \return if this function can return true or false + * - true means buf already set to the + * result by recovering procedure, the action is complete, no further action is needed + * - false means this is the lastest action that has not yet been executed, need to execute the action + */ +bool AllReduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { + if (flag != 0) { + utils::Assert(seqno == ActionSummary::kMaxSeq, "must only set seqno for normal operations"); + } + // request + ActionSummary req(flag, seqno); + while (true) { + // action + ActionSummary act = req; + // get the reduced action + if (!CheckAndRecover(TryAllReduce(&act, sizeof(act), 1, ActionSummary::Reducer))) continue; + if (act.check_ack()) { + if (act.check_point()) { + // if we also have check_point, do check point first + utils::Assert(!act.diff_seq(), + "check ack & check pt cannot occur together with normal ops"); + // if we requested checkpoint, we are free to go + if (req.check_point()) return true; + } else if (act.load_check()) { + // if there is only check_ack and load_check, do load_check + if (!CheckAndRecover(TryLoadCheckPoint(req.load_check()))) continue; + // if requested load check, then misson complete + if (req.load_check()) return true; + } else { + // there is no check point and no load check, execute check ack + if (req.check_ack()) return true; + } + // if execute to this point + // this means the action requested has not been completed + // try next round + } else { + if (act.check_point()) { + if (act.diff_seq()) { + utils::Assert(act.min_seqno() != ActionSummary::kMaxSeq, "min seq bug"); + bool requester = req.min_seqno() == act.min_seqno(); + if (!CheckAndRecover(TryGetResult(buf, size, act.min_seqno(), requester))) continue; + if (requester) return true; + } else { + // no difference in seq no, means we are free to check point + if (req.check_point()) return true; + } + } else { + // no check point + if (act.load_check()) { + // load check have higher priority, do load_check + if (!CheckAndRecover(TryLoadCheckPoint(req.load_check()))) continue; + // if requested load check, then misson complete + if (req.load_check()) return true; + } else { + // no special flags, no checkpoint, check ack, load_check + utils::Assert(act.min_seqno() != ActionSummary::kMaxSeq, "min seq bug"); + if (act.diff_seq()) { + bool requester = req.min_seqno() == act.min_seqno(); + if (!CheckAndRecover(TryGetResult(buf, size, act.min_seqno(), requester))) continue; + if (requester) return true; + } else { + // all the request is same, this is most recent command that is yet to be executed + return false; + } + } + } + // something is still incomplete try next round + } + } + utils::Assert(false, "RecoverExec: should not reach here"); return true; } } // namespace engine diff --git a/src/engine_robust.h b/src/engine_robust.h index f1949e11a..fa18406db 100644 --- a/src/engine_robust.h +++ b/src/engine_robust.h @@ -89,15 +89,19 @@ class AllReduceRobust : public AllReduceBase { inline int min_seqno(void) const { return seqcode >> 4; } + // whether the operation set contains a load_check + inline bool load_check(void) const { + return (seqcode & kLoadCheck) != 0; + } // whether the operation set contains a check point inline bool check_point(void) const { return (seqcode & kCheckPoint) != 0; } - // whether the operation set contains a check point + // whether the operation set contains a check ack inline bool check_ack(void) const { return (seqcode & kCheckAck) != 0; } - // whether the operation set contains a check point + // whether the operation set contains different sequence number inline bool diff_seq(void) const { return (seqcode & kDiffSeq) != 0; } @@ -184,17 +188,64 @@ class AllReduceRobust : public AllReduceBase { * when kSockError is returned, it simply means there are bad sockets in the links, * and some link recovery proceduer is needed */ - ReturnType TryResetLinks(void); - /*! - * \brief Run recovery execution of a action specified by flag and seqno, - * there can be two outcome of the function - * - * \param sendrecvbuf_ - * - * \return if this function returns true, this means - * behind and we will be able to recover data from existing node + ReturnType TryResetLinks(void); + /*! + * \brief try to reconnect the broken links + * \return this function can kSuccess or kSockError */ - bool RecoverExec(void *sendrecvbuf_, size_t size, int flag, int seqno); + ReturnType TryReConnectLinks(void); + /*! + * \brief if err_type indicates an error + * recover links according to the error type reported + * if there is no error, return true + * \param err_type the type of error happening in the system + * \return true if err_type is kSuccess, false otherwise + */ + bool CheckAndRecover(ReturnType err_type); + /*! + * \brief try to run recover execution for a request action described by flag and seqno, + * the function will keep blocking to run possible recovery operations before the specified action, + * until the requested result is received by a recovering procedure, + * or the function discovers that the requested action is not yet executed, and return false + * + * \param buf the buffer to store the result + * \param size the total size of the buffer + * \param flag flag information about the action \sa ActionSummary + * \param seqno sequence number of the action, if it is special action with flag set, seqno needs to be set to ActionSummary::kMaxSeq + * + * \return if this function can return true or false + * - true means buf already set to the + * result by recovering procedure, the action is complete, no further action is needed + * - false means this is the lastest action that has not yet been executed, need to execute the action + */ + bool RecoverExec(void *buf, size_t size, int flag, int seqno = ActionSummary::kMaxSeq); + /*! + * \brief try to load check point + * + * This is a collaborative function called by all nodes + * only the nodes with requester set to true really needs to load the check point + * other nodes acts as collaborative roles to complete this request + * + * \param requester whether current node is the requester + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType + */ + ReturnType TryLoadCheckPoint(bool requester); + /*! + * \brief try to get the result of operation specified by seqno + * + * This is a collaborative function called by all nodes + * only the nodes with requester set to true really needs to get the result + * other nodes acts as collaborative roles to complete this request + * + * \param buf the buffer to store the result, this parameter is only use when current node is requester + * \param size the total size of the buffer, this parameter is only use when current node is requester + * \param seqno sequence number of the operation, this is unique index of a operation in current iteration + * \param requester whether current node is the requester + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType + */ + ReturnType TryGetResult(void *buf, size_t size, int seqno, bool requester); //---- recovery data structure ---- // call sequence counter, records how many calls we made so far // from last call to CheckPoint, LoadCheckPoint From d1ce3c697caf843e4df8dd599bc993f543f18957 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 11:45:50 -0800 Subject: [PATCH 016/531] inline --- src/engine_robust.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index fcc3ebc20..3e9ec6c6c 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -255,7 +255,8 @@ AllReduceRobust::ReturnType AllReduceRobust::TryGetResult(void *sendrecvbuf, siz * \param buf the buffer to store the result * \param size the total size of the buffer * \param flag flag information about the action \sa ActionSummary - * \param seqno sequence number of the action, if it is special action with flag set, seqno needs to be set to ActionSummary::kMaxSeq + * \param seqno sequence number of the action, if it is special action with flag set, + * seqno needs to be set to ActionSummary::kMaxSeq * * \return if this function can return true or false * - true means buf already set to the From 68f13cd73969a9d0ac1b5be4fcc922244a88414c Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 11:46:21 -0800 Subject: [PATCH 017/531] tight --- src/engine_robust.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 3e9ec6c6c..50e1ec0a4 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -242,7 +242,8 @@ AllReduceRobust::ReturnType AllReduceRobust::TryLoadCheckPoint(bool requester) { * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details * \sa ReturnType */ -AllReduceRobust::ReturnType AllReduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { +AllReduceRobust::ReturnType +AllReduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { utils::Error("TryGetResult: not implemented"); return kSuccess; } From 7a60cb7f3e1ee44ce9b1519466245c94d4ff17ea Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 16:37:26 -0800 Subject: [PATCH 018/531] checkin decide request, todo message passing --- src/engine_robust.cc | 129 ++++++++++++++++++++++++++++++++++++++++++- src/engine_robust.h | 60 ++++++++++++++++++++ 2 files changed, 187 insertions(+), 2 deletions(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 50e1ec0a4..65a8c3aef 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -6,6 +6,8 @@ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX +#include +#include #include "./utils.h" #include "./engine_robust.h" @@ -213,6 +215,127 @@ bool AllReduceRobust::CheckAndRecover(ReturnType err_type) { } return false; } +/*! + * \brief message passing function, used to decide the + * shortest distance to the possible source of data + * \param node_value a pair of have_data and size + * have_data whether current node have data + * size gives the size of data, if current node is kHaveData + * \param dist_in the shorest to any data source distance in each direction + * \param out_index the edge index of output link + * \return the shorest distance result of out edge specified by out_index + */ +inline std::pair +ShortestDist(const std::pair &node_value, + const std::vector< std::pair > &dist_in, + size_t out_index) { + if (node_value.first) { + return std::make_pair(1, node_value.second); + } + size_t size = 0; + int res = std::numeric_limits::max(); + for (size_t i = 0; i < dist_in.size(); ++i) { + if (i == out_index) continue; + if (dist_in[i].first < res) { + res = dist_in[i].first; size = dist_in[i].second; + } + } + return std::make_pair(res, size); +} +/*! + * \brief message passing function, used to decide the + * data request from each edge, whether need to request data from certain edge + * \param node_value a pair of request_data and best_link + * request_data stores whether current node need to request data + * best_link gives the best edge index to fetch the data + * \param req_in the data request from incoming edges + * \param out_index the edge index of output link + * \return the request to the output edge + */ +inline bool DataRequest(const std::pair &node_value, + const std::vector &req_in, + size_t out_index) { + // whether current node need to request data + bool request_data = node_value.first; + // which edge index is the best link to request data + // can be -1, which means current node contains data + const int best_link = node_value.second; + if (static_cast(out_index) == best_link) { + if (request_data) return true; + for (size_t i = 0; i < req_in.size(); ++i) { + if (i == out_index) continue; + if (req_in[i]) return true; + } + } + return false; +} +/*! + * \brief try to decide the recovery message passing request + * \param role the current role of the node + * \param p_req_outlink used to store the output link the + * current node should recv data from, + * this can be -1 or -2, + * -1 means current node have the data + * -2 means current node do not have data, but also do not need to send/recv data + * \param p_req_in used to store the resulting vector, indicating which link we should send the data to + * \param p_size used to store the size of the message, for node in state kHaveData, + * this size must be set correctly before calling the function + * for others, this surves as output parameter + * + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType + */ +AllReduceRobust::ReturnType +AllReduceRobust::TryDecideRequest(AllReduceRobust::RecoverType role, + int *p_req_outlink, + std::vector *p_req_in, + size_t *p_size) { + int best_link = -2; + {// get the shortest distance to the request point + std::vector< std::pair > dist_in, dist_out; + ReturnType succ = MsgPassing(std::make_pair(role == kHaveData, *p_size), + &dist_in, &dist_out, ShortestDist); + if (succ != kSuccess) return succ; + if (role != kHaveData) { + for (size_t i = 0; i < dist_in.size(); ++i) { + if (dist_in[i].first != std::numeric_limits::max()) { + utils::Check(best_link == -2 || *p_size == dist_in[i].second, + "AllReduce size inconsistent"); + if (best_link == -2 || dist_in[i].first < dist_in[best_link].first) { + best_link = static_cast(i); + *p_size = dist_in[i].second; + } + } + } + utils::Check(best_link != -2, "Too many nodes went down and we cannot recover.."); + } else { + best_link = -1; + } + } + // get the node request + std::vector &req_in = *p_req_in; + std::vector req_out; + ReturnType succ = MsgPassing(std::make_pair(role == kRequestData, best_link), + &req_in, &req_out, DataRequest); + if (succ != kSuccess) return succ; + bool need_recv = false; + for (size_t i = 0; i < req_in.size(); ++i) { + if (req_out[i]) { + utils::Assert(!req_in[i], "cannot get and receive request"); + utils::Assert(static_cast(i) == best_link, "request result inconsistent"); + need_recv = true; + } + } + if (role == kPassData && !need_recv) { + for (size_t i = 0; i < req_in.size(); ++i) { + utils::Assert(!req_in[i], "Bug in TryDecideRequest"); + } + *p_req_outlink = 2; + } else { + *p_req_outlink = best_link; + } + return kSuccess; +} /*! * \brief try to load check point * @@ -225,7 +348,7 @@ bool AllReduceRobust::CheckAndRecover(ReturnType err_type) { * \sa ReturnType */ AllReduceRobust::ReturnType AllReduceRobust::TryLoadCheckPoint(bool requester) { - utils::Error("TryLoadCheckPoint: not implemented"); + return kSuccess; } /*! @@ -308,10 +431,12 @@ bool AllReduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { } else { // no check point if (act.load_check()) { + // all the nodes called load_check, this is an incomplete action + if (!act.diff_seq()) return false; // load check have higher priority, do load_check if (!CheckAndRecover(TryLoadCheckPoint(req.load_check()))) continue; // if requested load check, then misson complete - if (req.load_check()) return true; + if (req.load_check()) return true; } else { // no special flags, no checkpoint, check ack, load_check utils::Assert(act.min_seqno() != ActionSummary::kMaxSeq, "min seq bug"); diff --git a/src/engine_robust.h b/src/engine_robust.h index fa18406db..e6312d7ce 100644 --- a/src/engine_robust.h +++ b/src/engine_robust.h @@ -9,6 +9,7 @@ */ #ifndef ALLREDUCE_ENGINE_ROBUST_H #define ALLREDUCE_ENGINE_ROBUST_H +#include #include "./engine.h" #include "./engine_base.h" @@ -57,6 +58,15 @@ class AllReduceRobust : public AllReduceBase { const static char kResetMark = 97; // and mark for channel cleanup const static char kResetAck = 97; + /*! \brief type of roles each node can play during recovery */ + enum RecoverType { + /*! \brief current node have data */ + kHaveData, + /*! \brief current node request data */ + kRequestData, + /*! \brief current node only helps to pass data around */ + kPassData + }; /*! * \brief summary of actions proposed in all nodes * this data structure is used to make consensus decision @@ -246,6 +256,53 @@ class AllReduceRobust : public AllReduceBase { * \sa ReturnType */ ReturnType TryGetResult(void *buf, size_t size, int seqno, bool requester); + /*! + * \brief try to decide the recovery message passing request + * \param role the current role of the node + * \param p_req_outlink used to store the output link the + * current node should recv data from, + * this can be nonnegative value, -1 or -2, + * -1 means current node have the data + * -2 means current node do not have data, but also do not need to send/recv data + * \param p_req_in used to store the resulting vector, indicating which link we should send the data to + * \param p_size used to store the size of the message, for node in state kHaveData, + * this size must be set correctly before calling the function + * for others, this surves as output parameter + * + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType + */ + ReturnType TryDecideRequest(RecoverType role, + int *p_req_outlink, + std::vector *p_req_in, + size_t *p_size); + /*! + * \brief run message passing algorithm on the allreduce tree + * the result is edge message stored in p_edge_in and p_edge_out + * \param node_value the value associated with current node + * \param p_edge_in used to store input message from each of the edge + * \param p_edge_out used to store output message from each of the edge + * \param func a function that defines the message passing rule + * Parameters of func: + * - node_value same as node_value in the main function + * - edge_in the array of input messages from each edge, + * this includes the output edge, which should be excluded + * - out_index array the index of output edge, the function should + * exclude the output edge when compute the message passing value + * Return of func: + * the function returns the output message based on the input message and node_value + * + * \tparam EdgeType type of edge message, must be simple struct + * \tparam NodeType type of node value + */ + template + inline ReturnType MsgPassing(const NodeType &node_value, + std::vector *p_edge_in, + std::vector *p_edge_out, + EdgeType (*func) (const NodeType &node_value, + const std::vector &edge_in, + size_t out_index) + ); //---- recovery data structure ---- // call sequence counter, records how many calls we made so far // from last call to CheckPoint, LoadCheckPoint @@ -254,4 +311,7 @@ class AllReduceRobust : public AllReduceBase { ResultBuffer resbuf; }; } // namespace engine +// implementation of inline template function +#include "./engine_robust-inl.h" + #endif // ALLREDUCE_ENGINE_ROBUST_H From 38cd5952353157635b2a546b45af5bf1fdcd8db3 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 16:38:47 -0800 Subject: [PATCH 019/531] check in message passing --- src/engine_robust-inl.h | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 src/engine_robust-inl.h diff --git a/src/engine_robust-inl.h b/src/engine_robust-inl.h new file mode 100644 index 000000000..837ffd40c --- /dev/null +++ b/src/engine_robust-inl.h @@ -0,0 +1,42 @@ +/*! + * \file engine_robust-inl.h + * \brief implementation of inline template function in AllReduceRobust + * + * \author Tianqi, Nacho, Tianyi + */ +#ifndef ALLREDUCE_ENGINE_ROBUST_INL_H +#define ALLREDUCE_ENGINE_ROBUST_INL_H + +namespace engine { +/*! + * \brief run message passing algorithm on the allreduce tree + * the result is edge message stored in p_edge_in and p_edge_out + * \param node_value the value associated with current node + * \param p_edge_in used to store input message from each of the edge + * \param p_edge_out used to store output message from each of the edge + * \param func a function that defines the message passing rule + * Parameters of func: + * - node_value same as node_value in the main function + * - edge_in the array of input messages from each edge, + * this includes the output edge, which should be excluded + * - out_index array the index of output edge, the function should + * exclude the output edge when compute the message passing value + * Return of func: + * the function returns the output message based on the input message and node_value + * + * \tparam EdgeType type of edge message, must be simple struct + * \tparam NodeType type of node value + */ +template +inline AllReduceRobust::ReturnType +AllReduceRobust::MsgPassing(const NodeType &node_value, + std::vector *p_edge_in, + std::vector *p_edge_out, + EdgeType (*func) (const NodeType &node_value, + const std::vector &edge_in, + size_t out_index) + ) { + return kSuccess; +} +} // namespace engine +#endif // ALLREDUCE_ENGINE_ROBUST_INL_H From d8d648549f73b2446f545b4e47a5eb1677175eee Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 17:40:30 -0800 Subject: [PATCH 020/531] finish message passing, do a review on msg passing and decide --- src/engine_robust-inl.h | 107 ++++++++++++++++++++++++++++++++++++++++ src/engine_robust.cc | 19 ++++--- 2 files changed, 118 insertions(+), 8 deletions(-) diff --git a/src/engine_robust-inl.h b/src/engine_robust-inl.h index 837ffd40c..11f458623 100644 --- a/src/engine_robust-inl.h +++ b/src/engine_robust-inl.h @@ -36,6 +36,113 @@ AllReduceRobust::MsgPassing(const NodeType &node_value, const std::vector &edge_in, size_t out_index) ) { + if (links.size() == 0) return kSuccess; + // number of links + const int nlink = static_cast(links.size()); + // initialize the pointers + for (int i = 0; i < nlink; ++i) { + links[i].ResetSize(); + } + std::vector &edge_in = *p_edge_in; + std::vector &edge_out = *p_edge_out; + edge_in.resize(nlink); + edge_out.resize(nlink); + // stages in the process + // 0: recv messages from childs + // 1: send message to parent + // 2: recv message from parent + // 3: send message to childs + int stage = 0; + // if no childs, no need to reduce + if (nlink == static_cast(parent_index != -1)) { + stage = 1; + } + // while we have not passed the messages out + while (true) { + // for node with no parent, directly do stage 3 + if (parent_index == -1) { + utils::Assert(stage != 2 && stage != 1, "invalie stage id"); + } + // select helper + utils::SelectHelper selecter; + for (int i = 0; i < nlink; ++i) { + selecter.WatchException(links[i].sock); + switch (stage) { + case 0: + if (i != parent_index && links[i].size_read != sizeof(EdgeType)) { + selecter.WatchRead(links[i].sock); + } + break; + case 1: if (i == parent_index) selecter.WatchWrite(links[i].sock); break; + case 2: if (i == parent_index) selecter.WatchRead(links[i].sock); break; + case 3: + if (i != parent_index && links[i].size_write != sizeof(EdgeType)) { + selecter.WatchWrite(links[i].sock); + } + break; + default: utils::Error("invalid stage"); + } + } + // select must return + selecter.Select(); + // exception handling + for (int i = 0; i < nlink; ++i) { + // recive OOB message from some link + if (selecter.CheckExcept(links[i].sock)) return kGetExcept; + } + if (stage == 0) { + bool finished = true; + // read data from childs + for (int i = 0; i < nlink; ++i) { + if (i != parent_index) { + if (selecter.CheckRead(links[i].sock)) { + if (!links[i].ReadToArray(&edge_in[i], sizeof(EdgeType))) return kSockError; + } + if (links[i].size_read != sizeof(EdgeType)) finished = false; + } + } + // if no parent, jump to stage 3, otherwise do stage 1 + if (finished) { + if (parent_index != -1) { + edge_out[parent_index] = func(node_value, edge_in, parent_index); + stage = 1; + } else { + for (int i = 0; i < nlink; ++i) { + edge_out[i] = func(node_value, edge_in, i); + } + stage = 3; + } + } + } + if (stage == 1) { + const int pid = this->parent_index; + utils::Assert(pid != -1, "MsgPassing invalid stage"); + if (!links[pid].WriteFromArray(&edge_out[pid], sizeof(EdgeType))) return kSockError; + if (links[pid].size_write == sizeof(EdgeType)) stage = 2; + } + if (stage == 2) { + const int pid = this->parent_index; + utils::Assert(pid != -1, "MsgPassing invalid stage"); + if (!links[pid].ReadToArray(&edge_in[pid], sizeof(EdgeType))) return kSockError; + if (links[pid].size_read == sizeof(EdgeType)) { + for (int i = 0; i < nlink; ++i) { + if (i != pid) edge_out[i] = func(node_value, edge_in, i); + } + stage = 3; + } + } + if (stage == 3) { + bool finished = true; + for (int i = 0; i < nlink; ++i) { + if (i != parent_index && links[i].size_write != sizeof(EdgeType)) { + if (!links[i].WriteFromArray(&edge_out[i], sizeof(EdgeType))) return kSockError; + if (links[i].size_write != sizeof(EdgeType)) finished = false; + } + } + // finish all the stages + if (finished) break; + } + } return kSuccess; } } // namespace engine diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 65a8c3aef..00ed26b80 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -252,8 +252,8 @@ ShortestDist(const std::pair &node_value, * \param out_index the edge index of output link * \return the request to the output edge */ -inline bool DataRequest(const std::pair &node_value, - const std::vector &req_in, +inline char DataRequest(const std::pair &node_value, + const std::vector &req_in, size_t out_index) { // whether current node need to request data bool request_data = node_value.first; @@ -261,13 +261,13 @@ inline bool DataRequest(const std::pair &node_value, // can be -1, which means current node contains data const int best_link = node_value.second; if (static_cast(out_index) == best_link) { - if (request_data) return true; + if (request_data) return 1; for (size_t i = 0; i < req_in.size(); ++i) { if (i == out_index) continue; - if (req_in[i]) return true; + if (req_in[i] != 0) return 1; } } - return false; + return 0; } /*! * \brief try to decide the recovery message passing request @@ -313,13 +313,16 @@ AllReduceRobust::TryDecideRequest(AllReduceRobust::RecoverType role, } } // get the node request - std::vector &req_in = *p_req_in; - std::vector req_out; + std::vector req_in, req_out; ReturnType succ = MsgPassing(std::make_pair(role == kRequestData, best_link), &req_in, &req_out, DataRequest); if (succ != kSuccess) return succ; bool need_recv = false; + // set p_req_in + p_req_in->resize(req_in.size()); for (size_t i = 0; i < req_in.size(); ++i) { + // set p_req_in + (*p_req_in)[i] = (req_in[i] != 0); if (req_out[i]) { utils::Assert(!req_in[i], "cannot get and receive request"); utils::Assert(static_cast(i) == best_link, "request result inconsistent"); @@ -331,7 +334,7 @@ AllReduceRobust::TryDecideRequest(AllReduceRobust::RecoverType role, utils::Assert(!req_in[i], "Bug in TryDecideRequest"); } *p_req_outlink = 2; - } else { + } else { *p_req_outlink = best_link; } return kSuccess; From 27f6f8ea9eef60bf3b3d23e6e97c90fde820b639 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 17:42:18 -0800 Subject: [PATCH 021/531] bugfix in msg passing --- src/engine_robust-inl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/engine_robust-inl.h b/src/engine_robust-inl.h index 11f458623..5eb30625d 100644 --- a/src/engine_robust-inl.h +++ b/src/engine_robust-inl.h @@ -53,8 +53,10 @@ AllReduceRobust::MsgPassing(const NodeType &node_value, // 2: recv message from parent // 3: send message to childs int stage = 0; - // if no childs, no need to reduce + // if no childs, no need to, directly start passing message if (nlink == static_cast(parent_index != -1)) { + utils::Assert(parent_index == 0, "parent must be 0"); + edge_out[parent_index] = func(node_value, edge_in, parent_index); stage = 1; } // while we have not passed the messages out From 4a6c01c83cc1b21ba50121361ba5ab03a56702f1 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 17:48:02 -0800 Subject: [PATCH 022/531] minor change in decide --- src/engine_robust.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 00ed26b80..049fbda13 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -323,15 +323,15 @@ AllReduceRobust::TryDecideRequest(AllReduceRobust::RecoverType role, for (size_t i = 0; i < req_in.size(); ++i) { // set p_req_in (*p_req_in)[i] = (req_in[i] != 0); - if (req_out[i]) { - utils::Assert(!req_in[i], "cannot get and receive request"); + if (req_out[i] != 0) { + utils::Assert(req_in[i] == 0, "cannot get and receive request"); utils::Assert(static_cast(i) == best_link, "request result inconsistent"); need_recv = true; } } if (role == kPassData && !need_recv) { for (size_t i = 0; i < req_in.size(); ++i) { - utils::Assert(!req_in[i], "Bug in TryDecideRequest"); + utils::Assert(req_in[i] == 0, "Bug in TryDecideRequest"); } *p_req_outlink = 2; } else { From b9b58a12754a73cd8099bbe3828b20ff4cea1e6b Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 17:48:30 -0800 Subject: [PATCH 023/531] bugfix in decide --- src/engine_robust.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 049fbda13..de7744b23 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -333,7 +333,7 @@ AllReduceRobust::TryDecideRequest(AllReduceRobust::RecoverType role, for (size_t i = 0; i < req_in.size(); ++i) { utils::Assert(req_in[i] == 0, "Bug in TryDecideRequest"); } - *p_req_outlink = 2; + *p_req_outlink = -2; } else { *p_req_outlink = best_link; } From ecb09a23bc4899ada5a24487115257ff7ee0ecf3 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 20:59:55 -0800 Subject: [PATCH 024/531] add recover data, do a round of review --- src/allreduce.h | 2 +- src/engine.cc | 2 +- src/engine_base.cc | 3 +- src/engine_base.h | 8 +-- src/engine_robust-inl.h | 2 +- src/engine_robust.cc | 135 ++++++++++++++++++++++++++++++++++------ src/engine_robust.h | 44 +++++++++---- 7 files changed, 156 insertions(+), 40 deletions(-) diff --git a/src/allreduce.h b/src/allreduce.h index c6dccade6..3f389a591 100644 --- a/src/allreduce.h +++ b/src/allreduce.h @@ -5,7 +5,7 @@ * \brief This file defines a template wrapper of engine to give more flexible * AllReduce operations * - * \author Tianqi Chen, Nacho, Tianyi + * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ #include "./engine.h" diff --git a/src/engine.cc b/src/engine.cc index 17aacd5cf..375f8e770 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -3,7 +3,7 @@ * \brief this file governs which implementation of engine we are actually using * provides an singleton of engine interface * - * \author Tianqi, Nacho, Tianyi + * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE diff --git a/src/engine_base.cc b/src/engine_base.cc index dd9c8ac56..fb6e683ae 100644 --- a/src/engine_base.cc +++ b/src/engine_base.cc @@ -1,7 +1,8 @@ /*! * \file engine_base.cc * \brief Basic implementation of AllReduce - * \author Tianqi, Nacho, Tianyi + * + * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE diff --git a/src/engine_base.h b/src/engine_base.h index 61fdd6033..582cf5e17 100644 --- a/src/engine_base.h +++ b/src/engine_base.h @@ -5,8 +5,8 @@ * * This implementation provides basic utility of AllReduce and Broadcast * without considering node failure - * - * \author Tianqi, Nacho, Tianyi + * + * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ #ifndef ALLREDUCE_ENGINE_BASE_H #define ALLREDUCE_ENGINE_BASE_H @@ -136,7 +136,7 @@ class AllReduceBase : public IEngine { inline void ResetSize(void) { size_write = size_read = 0; } - /*! + /*! * \brief read data into ring-buffer, with care not to existing useful override data * position after protect_start * \param protect_start all data start from protect_start is still needed in buffer @@ -157,7 +157,7 @@ class AllReduceBase : public IEngine { if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; size_read += static_cast(len); return true; - } + } /*! * \brief read data into array, * this function can not be used together with ReadToRingBuffer diff --git a/src/engine_robust-inl.h b/src/engine_robust-inl.h index 5eb30625d..2817d4c0a 100644 --- a/src/engine_robust-inl.h +++ b/src/engine_robust-inl.h @@ -2,7 +2,7 @@ * \file engine_robust-inl.h * \brief implementation of inline template function in AllReduceRobust * - * \author Tianqi, Nacho, Tianyi + * \author Tianqi Chen */ #ifndef ALLREDUCE_ENGINE_ROBUST_INL_H #define ALLREDUCE_ENGINE_ROBUST_INL_H diff --git a/src/engine_robust.cc b/src/engine_robust.cc index de7744b23..dbc48f406 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -1,7 +1,8 @@ /*! * \file engine_robust.cc * \brief Robust implementation of AllReduce - * \author Tianqi, Nacho, Tianyi + * + * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE @@ -272,24 +273,22 @@ inline char DataRequest(const std::pair &node_value, /*! * \brief try to decide the recovery message passing request * \param role the current role of the node - * \param p_req_outlink used to store the output link the - * current node should recv data from, - * this can be -1 or -2, - * -1 means current node have the data - * -2 means current node do not have data, but also do not need to send/recv data - * \param p_req_in used to store the resulting vector, indicating which link we should send the data to * \param p_size used to store the size of the message, for node in state kHaveData, * this size must be set correctly before calling the function * for others, this surves as output parameter * + * \param p_recvlink used to store the link current node should recv data from, if necessary + * this can be -1, which means current node have the data + * \param p_req_in used to store the resulting vector, indicating which link we should send the data to + * * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details * \sa ReturnType */ AllReduceRobust::ReturnType -AllReduceRobust::TryDecideRequest(AllReduceRobust::RecoverType role, - int *p_req_outlink, - std::vector *p_req_in, - size_t *p_size) { +AllReduceRobust::TryDecideRouting(AllReduceRobust::RecoverType role, + size_t *p_size, + int *p_recvlink, + std::vector *p_req_in) { int best_link = -2; {// get the shortest distance to the request point std::vector< std::pair > dist_in, dist_out; @@ -317,7 +316,6 @@ AllReduceRobust::TryDecideRequest(AllReduceRobust::RecoverType role, ReturnType succ = MsgPassing(std::make_pair(role == kRequestData, best_link), &req_in, &req_out, DataRequest); if (succ != kSuccess) return succ; - bool need_recv = false; // set p_req_in p_req_in->resize(req_in.size()); for (size_t i = 0; i < req_in.size(); ++i) { @@ -326,16 +324,115 @@ AllReduceRobust::TryDecideRequest(AllReduceRobust::RecoverType role, if (req_out[i] != 0) { utils::Assert(req_in[i] == 0, "cannot get and receive request"); utils::Assert(static_cast(i) == best_link, "request result inconsistent"); - need_recv = true; } } - if (role == kPassData && !need_recv) { - for (size_t i = 0; i < req_in.size(); ++i) { - utils::Assert(req_in[i] == 0, "Bug in TryDecideRequest"); + *p_recvlink = best_link; + return kSuccess; +} + +/*! + * \brief try to finish the data recovery request, + * this function is used together with TryDecideRouting + * \param role the current role of the node + * \param sendrecvbuf_ the buffer to store the data to be sent/recived + * - if the role is kHaveData, this stores the data to be sent + * - if the role is kRequestData, this is the buffer to store the result + * - if the role is kPassData, this will not be used, and can be NULL + * \param size the size of the data, obtained from TryDecideRouting + * \param recv_link the link index to receive data, if necessary, obtained from TryDecideRouting + * \param req_in the request of each link to send data, obtained from TryDecideRouting + * + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType, TryDecideRouting + */ +AllReduceRobust::ReturnType +AllReduceRobust::TryRecoverData(RecoverType role, + void *sendrecvbuf_, + size_t size, + int recv_link, + const std::vector &req_in) { + // no need to run recovery for zero size message + if (size == 0) return kSuccess; + utils::Assert(req_in.size() == links.size(), "TryRecoverData"); + const int nlink = static_cast(links.size()); + { + bool req_data = role == kRequestData; + for (int i = 0; i < nlink; ++i) { + if (req_in[i]) { + utils::Assert(i != recv_link, "TryDecideRouting"); + req_data = true; + } + } + // do not need to provide data or receive data, directly exit + if (!req_data) return kSuccess; + } + for (int i = 0; i < nlink; ++i) { + links[i].ResetSize(); + } + utils::Assert(recv_link >= 0 || role == kHaveData, "recv_link must be active"); + if (role == kPassData) { + links[recv_link].InitBuffer(1, size, reduce_buffer_size); + } + while (true) { + bool finished = true; + utils::SelectHelper selecter; + for (int i = 0; i < nlink; ++i) { + if (i == recv_link && links[i].size_read != size) { + selecter.WatchRead(links[i].sock); + finished = false; + } + if (req_in[i] && links[i].size_write != size) { + selecter.WatchWrite(links[i].sock); + finished = false; + } + selecter.WatchException(links[i].sock); + } + if (finished) break; + selecter.Select(); + if (role == kRequestData) { + const int pid = recv_link; + if (selecter.CheckRead(links[pid].sock)) { + if(!links[pid].ReadToArray(sendrecvbuf_, size)) return kSockError; + } + for (int i = 0; i < nlink; ++i) { + if (req_in[i] && links[i].size_write != links[pid].size_read && + selecter.CheckWrite(links[i].sock)) { + if(!links[i].WriteFromArray(sendrecvbuf_, links[pid].size_read)) return kSockError; + } + } + } + if (role == kHaveData) { + for (int i = 0; i < nlink; ++i) { + if (req_in[i] && selecter.CheckWrite(links[i].sock)) { + if(!links[i].WriteFromArray(sendrecvbuf_, size)) return kSockError; + } + } + } + if (role == kPassData) { + const int pid = recv_link; + const size_t buffer_size = links[pid].buffer_size; + if (selecter.CheckRead(links[pid].sock)) { + size_t min_write = size; + for (int i = 0; i < nlink; ++i) { + if (req_in[i]) min_write = std::min(links[i].size_write, min_write); + } + utils::Assert(min_write <= links[pid].size_read, "boundary check"); + if (!links[pid].ReadToRingBuffer(min_write)) return kSockError; + } + for (int i = 0; i < nlink; ++i) { + if (req_in[i] && selecter.CheckWrite(links[i].sock)) { + size_t start = links[i].size_write % buffer_size; + // send out data from ring buffer + size_t nwrite = std::min(buffer_size - start, links[pid].size_read - links[i].size_write); + ssize_t len = links[pid].sock.Send(links[pid].buffer_head + start, nwrite); + if (len != -1) { + links[i].size_write += len; + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + } + } + } } - *p_req_outlink = -2; - } else { - *p_req_outlink = best_link; } return kSuccess; } diff --git a/src/engine_robust.h b/src/engine_robust.h index e6312d7ce..be9cf0998 100644 --- a/src/engine_robust.h +++ b/src/engine_robust.h @@ -5,7 +5,7 @@ * * This implementation considers the failure of nodes * - * \author Tianqi, Nacho, Tianyi + * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ #ifndef ALLREDUCE_ENGINE_ROBUST_H #define ALLREDUCE_ENGINE_ROBUST_H @@ -257,25 +257,43 @@ class AllReduceRobust : public AllReduceBase { */ ReturnType TryGetResult(void *buf, size_t size, int seqno, bool requester); /*! - * \brief try to decide the recovery message passing request + * \brief try to decide the routing strategy for recovery * \param role the current role of the node - * \param p_req_outlink used to store the output link the - * current node should recv data from, - * this can be nonnegative value, -1 or -2, - * -1 means current node have the data - * -2 means current node do not have data, but also do not need to send/recv data - * \param p_req_in used to store the resulting vector, indicating which link we should send the data to * \param p_size used to store the size of the message, for node in state kHaveData, * this size must be set correctly before calling the function * for others, this surves as output parameter + + * \param p_recvlink used to store the link current node should recv data from, if necessary + * this can be -1, which means current node have the data + * \param p_req_in used to store the resulting vector, indicating which link we should send the data to * * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details - * \sa ReturnType + * \sa ReturnType, TryRecoverData + */ + ReturnType TryDecideRouting(RecoverType role, + size_t *p_size, + int *p_recvlink, + std::vector *p_req_in); + /*! + * \brief try to finish the data recovery request, + * this function is used together with TryDecideRouting + * \param role the current role of the node + * \param sendrecvbuf_ the buffer to store the data to be sent/recived + * - if the role is kHaveData, this stores the data to be sent + * - if the role is kRequestData, this is the buffer to store the result + * - if the role is kPassData, this will not be used, and can be NULL + * \param size the size of the data, obtained from TryDecideRouting + * \param recv_link the link index to receive data, if necessary, obtained from TryDecideRouting + * \param req_in the request of each link to send data, obtained from TryDecideRouting + * + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType, TryDecideRouting */ - ReturnType TryDecideRequest(RecoverType role, - int *p_req_outlink, - std::vector *p_req_in, - size_t *p_size); + ReturnType TryRecoverData(RecoverType role, + void *sendrecvbuf_, + size_t size, + int recv_link, + const std::vector &req_in); /*! * \brief run message passing algorithm on the allreduce tree * the result is edge message stored in p_edge_in and p_edge_out From f7928c68a31c69f907d010057164630c716755d9 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 21:07:34 -0800 Subject: [PATCH 025/531] next round try more careful select design --- src/engine_base.cc | 4 ++-- src/engine_robust-inl.h | 9 ++++----- src/engine_robust.cc | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/engine_base.cc b/src/engine_base.cc index fb6e683ae..9f0aaa405 100644 --- a/src/engine_base.cc +++ b/src/engine_base.cc @@ -151,7 +151,7 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) { - if (links.size() == 0) return kSuccess; + if (links.size() == 0 || count == 0) return kSuccess; // total size of message const size_t total_size = type_nbytes * count; // number of links @@ -287,7 +287,7 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, */ AllReduceBase::ReturnType AllReduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { - if (links.size() == 0) return kSuccess; + if (links.size() == 0 || total_size == 0) return kSuccess; // number of links const int nlink = static_cast(links.size()); // size of space already read from data diff --git a/src/engine_robust-inl.h b/src/engine_robust-inl.h index 2817d4c0a..42558a750 100644 --- a/src/engine_robust-inl.h +++ b/src/engine_robust-inl.h @@ -67,6 +67,7 @@ AllReduceRobust::MsgPassing(const NodeType &node_value, } // select helper utils::SelectHelper selecter; + bool done = (stage == 3); for (int i = 0; i < nlink; ++i) { selecter.WatchException(links[i].sock); switch (stage) { @@ -80,12 +81,14 @@ AllReduceRobust::MsgPassing(const NodeType &node_value, case 3: if (i != parent_index && links[i].size_write != sizeof(EdgeType)) { selecter.WatchWrite(links[i].sock); + done = false; } break; default: utils::Error("invalid stage"); } } - // select must return + // finish all the stages, and write out message + if (done) break; selecter.Select(); // exception handling for (int i = 0; i < nlink; ++i) { @@ -134,15 +137,11 @@ AllReduceRobust::MsgPassing(const NodeType &node_value, } } if (stage == 3) { - bool finished = true; for (int i = 0; i < nlink; ++i) { if (i != parent_index && links[i].size_write != sizeof(EdgeType)) { if (!links[i].WriteFromArray(&edge_out[i], sizeof(EdgeType))) return kSockError; - if (links[i].size_write != sizeof(EdgeType)) finished = false; } } - // finish all the stages - if (finished) break; } } return kSuccess; diff --git a/src/engine_robust.cc b/src/engine_robust.cc index dbc48f406..7f510d2f3 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -352,7 +352,7 @@ AllReduceRobust::TryRecoverData(RecoverType role, int recv_link, const std::vector &req_in) { // no need to run recovery for zero size message - if (size == 0) return kSuccess; + if (links.size() == 0 || size == 0) return kSuccess; utils::Assert(req_in.size() == links.size(), "TryRecoverData"); const int nlink = static_cast(links.size()); { From 8cef2086f503ba2448500fae972673a9727ee03a Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 21:31:45 -0800 Subject: [PATCH 026/531] smarter select for allreduce and bcast --- src/engine_base.cc | 62 ++++++++++++++++++++++++++--------------- src/engine_robust.cc | 2 +- test/test_allreduce.cpp | 5 ++++ 3 files changed, 45 insertions(+), 24 deletions(-) diff --git a/src/engine_base.cc b/src/engine_base.cc index 9f0aaa405..00ac1cffb 100644 --- a/src/engine_base.cc +++ b/src/engine_base.cc @@ -179,12 +179,28 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, // while we have not passed the messages out while (true) { // select helper + bool finished = true; utils::SelectHelper selecter; - for (size_t i = 0; i < links.size(); ++i) { - selecter.WatchRead(links[i].sock); - selecter.WatchWrite(links[i].sock); + for (int i = 0; i < nlink; ++i) { + if (i == parent_index) { + if (size_down_in != total_size) { + selecter.WatchRead(links[i].sock); finished = false; + } + if (size_up_out != total_size) { + selecter.WatchWrite(links[i].sock); + } + } else { + if (links[i].size_read != total_size) { + selecter.WatchRead(links[i].sock); + } + if (links[i].size_write != total_size) { + selecter.WatchWrite(links[i].sock); finished = false; + } + } selecter.WatchException(links[i].sock); } + // finish runing allreduce + if (finished) break; // select must return selecter.Select(); // exception handling @@ -261,19 +277,12 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, // this is root, can use reduce as most recent point size_down_in = size_up_out = size_up_reduce; } - // check if we finished the job of message passing - size_t nfinished = size_down_in; // can pass message down to childs for (int i = 0; i < nlink; ++i) { - if (i != parent_index) { - if (selecter.CheckWrite(links[i].sock)) { - if (!links[i].WriteFromArray(sendrecvbuf, size_down_in)) return kSockError; - } - nfinished = std::min(links[i].size_write, nfinished); + if (i != parent_index && selecter.CheckWrite(links[i].sock)) { + if (!links[i].WriteFromArray(sendrecvbuf, size_down_in)) return kSockError; } } - // check boundary condition - if (nfinished >= total_size) break; } return kSuccess; } @@ -288,6 +297,7 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, AllReduceBase::ReturnType AllReduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { if (links.size() == 0 || total_size == 0) return kSuccess; + utils::Check(root < world_size, "Broadcast: root should be smaller than world size"); // number of links const int nlink = static_cast(links.size()); // size of space already read from data @@ -306,13 +316,25 @@ AllReduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { } // while we have not passed the messages out while(true) { + bool finished = true; // select helper utils::SelectHelper selecter; - for (size_t i = 0; i < links.size(); ++i) { - selecter.WatchRead(links[i].sock); - selecter.WatchWrite(links[i].sock); + for (int i = 0; i < nlink; ++i) { + if (in_link == -2) { + selecter.WatchRead(links[i].sock); finished = false; + } + if (i == in_link && links[i].size_read != total_size) { + selecter.WatchRead(links[i].sock); finished = false; + } + if (in_link != -2 && i != in_link && links[i].size_write != total_size) { + selecter.WatchWrite(links[i].sock); finished = false; + } selecter.WatchException(links[i].sock); } + // finish running + if (finished) break; + // select + selecter.Select(); // exception handling for (int i = 0; i < nlink; ++i) { // recive OOB message from some link @@ -336,18 +358,12 @@ AllReduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { size_in = links[in_link].size_read; } } - size_t nfinished = total_size; // send data to all out-link for (int i = 0; i < nlink; ++i) { - if (i != in_link) { - if (selecter.CheckWrite(links[i].sock)) { - if (!links[i].WriteFromArray(sendrecvbuf_, size_in)) return kSockError; - } - nfinished = std::min(nfinished, links[i].size_write); + if (i != in_link && selecter.CheckWrite(links[i].sock)) { + if (!links[i].WriteFromArray(sendrecvbuf_, size_in)) return kSockError; } } - // check boundary condition - if (nfinished >= total_size) break; } return kSuccess; } diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 7f510d2f3..9f03bea5e 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -366,10 +366,10 @@ AllReduceRobust::TryRecoverData(RecoverType role, // do not need to provide data or receive data, directly exit if (!req_data) return kSuccess; } + utils::Assert(recv_link >= 0 || role == kHaveData, "recv_link must be active"); for (int i = 0; i < nlink; ++i) { links[i].ResetSize(); } - utils::Assert(recv_link >= 0 || role == kHaveData, "recv_link must be active"); if (role == kPassData) { links[recv_link].InitBuffer(1, size, reduce_buffer_size); } diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 3a2cc2a9d..02cb4057f 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -70,6 +70,7 @@ int main(int argc, char *argv[]) { int n = atoi(argv[1]); sync::Init(argc, argv); int rank = sync::GetRank(); + int nproc = sync::GetWorldSize(); std::string name = sync::GetProcessorName(); test::Mock mock(rank, argv[2], argv[3]); @@ -79,6 +80,10 @@ int main(int argc, char *argv[]) { utils::LogPrintf("[%d] !!!TestMax pass\n", rank); TestSum(mock, n); utils::LogPrintf("[%d] !!!TestSum pass\n", rank); + for (int i = 0; i < nproc; i += nproc / 3) { + TestBcast(mock, n, i); + } + utils::LogPrintf("[%d] !!!TestBcast pass\n", rank); sync::Finalize(); printf("[%d] all check pass\n", rank); return 0; From 9355f5faf21a9c7ae44f1a9ecbb3332a5a8d8cd2 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 21:39:22 -0800 Subject: [PATCH 027/531] more conservative exception watching --- src/engine_base.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/engine_base.cc b/src/engine_base.cc index 00ac1cffb..eec2330fc 100644 --- a/src/engine_base.cc +++ b/src/engine_base.cc @@ -174,8 +174,7 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, // if no childs, no need to reduce if (nlink == static_cast(parent_index != -1)) { size_up_reduce = total_size; - } - + } // while we have not passed the messages out while (true) { // select helper @@ -184,7 +183,10 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, for (int i = 0; i < nlink; ++i) { if (i == parent_index) { if (size_down_in != total_size) { - selecter.WatchRead(links[i].sock); finished = false; + selecter.WatchRead(links[i].sock); + // only watch for exception in live channels + selecter.WatchException(links[i].sock); + finished = false; } if (size_up_out != total_size) { selecter.WatchWrite(links[i].sock); @@ -193,11 +195,15 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, if (links[i].size_read != total_size) { selecter.WatchRead(links[i].sock); } + // size_write <= size_read if (links[i].size_write != total_size) { - selecter.WatchWrite(links[i].sock); finished = false; + selecter.WatchWrite(links[i].sock); + // only watch for exception in live channels + selecter.WatchException(links[i].sock); + finished = false; } } - selecter.WatchException(links[i].sock); + } // finish runing allreduce if (finished) break; From 16f729115e0a4e2c04f0beaec6b79296d4893f0e Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 30 Nov 2014 22:41:04 -0800 Subject: [PATCH 028/531] checkin allreduce recover --- src/engine_robust.cc | 72 ++++++++++++++++++++++++++++++++------------ src/engine_robust.h | 25 +++++++++++++-- 2 files changed, 74 insertions(+), 23 deletions(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 9f03bea5e..6b820f98a 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -13,6 +13,9 @@ #include "./engine_robust.h" namespace engine { +AllReduceRobust::AllReduceRobust(void) { + result_buffer_round = 1; +} /*! * \brief perform in-place allreduce, on sendrecvbuf * this function is NOT thread-safe @@ -23,18 +26,29 @@ namespace engine { */ void AllReduceRobust::AllReduce(void *sendrecvbuf_, size_t type_nbytes, - size_t count, + size_t count, ReduceFunction reducer) { - while (true) { - ReturnType ret = TryAllReduce(sendrecvbuf_, type_nbytes, count, reducer); - if (ret == kSuccess) return; - if (ret == kSockError) { - utils::Error("error occur during all reduce\n"); - } - utils::LogPrintf("[%d] receive except signal, start reset link\n", rank); - TryResetLinks(); + bool recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter); + // now we are free to remove the last result, if any + if (resbuf.LastSeqNo() != -1 && + (resbuf.LastSeqNo() % result_buffer_round != rank % result_buffer_round)) { + resbuf.DropLast(); } - // TODO + void *temp = resbuf.AllocTemp(type_nbytes, count); + while (true) { + if (recovered) { + std::memcpy(temp, sendrecvbuf_, type_nbytes * count); break; + } else { + std::memcpy(temp, sendrecvbuf_, type_nbytes * count); + if (CheckAndRecover(TryAllReduce(temp, type_nbytes, count, reducer))) { + std::memcpy(sendrecvbuf_, temp, type_nbytes * count); break; + } else { + recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter); + } + } + } + resbuf.PushTemp(seq_counter, type_nbytes, count); + seq_counter += 1; } /*! * \brief broadcast data from root to all nodes @@ -329,7 +343,6 @@ AllReduceRobust::TryDecideRouting(AllReduceRobust::RecoverType role, *p_recvlink = best_link; return kSuccess; } - /*! * \brief try to finish the data recovery request, * this function is used together with TryDecideRouting @@ -417,7 +430,7 @@ AllReduceRobust::TryRecoverData(RecoverType role, if (req_in[i]) min_write = std::min(links[i].size_write, min_write); } utils::Assert(min_write <= links[pid].size_read, "boundary check"); - if (!links[pid].ReadToRingBuffer(min_write)) return kSockError; + if (!links[pid].ReadToRingBuffer(min_write)) return kSockError; } for (int i = 0; i < nlink; ++i) { if (req_in[i] && selecter.CheckWrite(links[i].sock)) { @@ -438,7 +451,7 @@ AllReduceRobust::TryRecoverData(RecoverType role, } /*! * \brief try to load check point - * + * * This is a collaborative function called by all nodes * only the nodes with requester set to true really needs to load the check point * other nodes acts as collaborative roles to complete this request @@ -448,8 +461,17 @@ AllReduceRobust::TryRecoverData(RecoverType role, * \sa ReturnType */ AllReduceRobust::ReturnType AllReduceRobust::TryLoadCheckPoint(bool requester) { - - return kSuccess; + RecoverType role = requester ? kRequestData : kHaveData; + size_t size = this->checked_model.length(); + int recv_link; + std::vector req_in; + ReturnType succ = TryDecideRouting(role, &size, &recv_link, &req_in); + if (succ != kSuccess) return succ; + if (role == kRequestData) { + checked_model.resize(size); + } + utils::Check(size != 0, "zero size check point is not allowed"); + return TryRecoverData(role, &checked_model[0], size, recv_link, req_in); } /*! * \brief try to get the result of operation specified by seqno @@ -458,17 +480,27 @@ AllReduceRobust::ReturnType AllReduceRobust::TryLoadCheckPoint(bool requester) { * only the nodes with requester set to true really needs to get the result * other nodes acts as collaborative roles to complete this request * - * \param buf the buffer to store the result, this parameter is only use when current node is requester - * \param size the total size of the buffer, this parameter is only use when current node is requester + * \param buf the buffer to store the result, this parameter is only used when current node is requester + * \param size the total size of the buffer, this parameter is only used when current node is requester * \param seqno sequence number of the operation, this is unique index of a operation in current iteration * \param requester whether current node is the requester * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details * \sa ReturnType */ AllReduceRobust::ReturnType -AllReduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { - utils::Error("TryGetResult: not implemented"); - return kSuccess; +AllReduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { RecoverType role; + if (!requester) { + sendrecvbuf = resbuf.Query(seqno, &size); + role = sendrecvbuf != NULL ? kHaveData : kPassData; + } else { + role = kRequestData; + } + int recv_link; + std::vector req_in; + ReturnType succ = TryDecideRouting(role, &size, &recv_link, &req_in); + if (succ != kSuccess) return succ; + utils::Check(size != 0, "zero size check point is not allowed"); + return TryRecoverData(role, sendrecvbuf, size, recv_link, req_in); } /*! * \brief try to run recover execution for a request action described by flag and seqno, diff --git a/src/engine_robust.h b/src/engine_robust.h index be9cf0998..92febdd70 100644 --- a/src/engine_robust.h +++ b/src/engine_robust.h @@ -16,7 +16,8 @@ namespace engine { /*! \brief implementation of fault tolerant all reduce engine */ class AllReduceRobust : public AllReduceBase { - public: + public: + AllReduceRobust(void); virtual ~AllReduceRobust(void) {} /*! * \brief perform in-place allreduce, on sendrecvbuf @@ -178,6 +179,19 @@ class AllReduceRobust : public AllReduceBase { if (idx == seqno_.size() || seqno_[idx] != seqid) return NULL; *p_size = size_[idx]; return BeginPtr(data_) + rptr_[idx]; + } + // drop last stored result + inline void DropLast(void) { + utils::Assert(seqno_.size() != 0, "there is nothing to be dropped"); + seqno_.pop_back(); + rptr_.pop_back(); + size_.pop_back(); + data_.resize(rptr_.back()); + } + // the sequence number of last stored result + inline int LastSeqNo(void) const { + if (seqno_.size() == 0) return -1; + return seqno_.back(); } private: // sequence number of each @@ -248,8 +262,8 @@ class AllReduceRobust : public AllReduceBase { * only the nodes with requester set to true really needs to get the result * other nodes acts as collaborative roles to complete this request * - * \param buf the buffer to store the result, this parameter is only use when current node is requester - * \param size the total size of the buffer, this parameter is only use when current node is requester + * \param buf the buffer to store the result, this parameter is only used when current node is requester + * \param size the total size of the buffer, this parameter is only used when current node is requester * \param seqno sequence number of the operation, this is unique index of a operation in current iteration * \param requester whether current node is the requester * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details @@ -325,8 +339,13 @@ class AllReduceRobust : public AllReduceBase { // call sequence counter, records how many calls we made so far // from last call to CheckPoint, LoadCheckPoint int seq_counter; + // the round of result buffer, used to mode the result + int result_buffer_round; // result buffer ResultBuffer resbuf; + // last check point model + std::string checked_model; + }; } // namespace engine // implementation of inline template function From eb2ca06d6753c5b0b30f202496a88afc204a998a Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Dec 2014 09:17:05 -0800 Subject: [PATCH 029/531] fresh name fresh start --- README.md | 12 +++++++++--- src/config.h | 9 +++++---- src/engine.cc | 2 ++ src/engine.h | 12 +++++++----- src/engine_base.cc | 4 +++- src/engine_base.h | 9 +++++---- src/engine_robust-inl.h | 8 +++++--- src/engine_robust.cc | 3 +++ src/engine_robust.h | 8 +++++--- src/io.h | 6 ++++-- src/mock.h | 23 ++++++++++++----------- src/{allreduce.h => rabit.h} | 14 +++++++------- src/socket.h | 8 +++++--- src/utils.h | 16 +++++++++------- test/test_allreduce.cpp | 25 ++++++++++++------------- 15 files changed, 93 insertions(+), 66 deletions(-) rename src/{allreduce.h => rabit.h} (95%) diff --git a/README.md b/README.md index d4fa97339..17b556f03 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ -AllReduce Abstraction +rabit: Robust Allreduce and Broadcast Interface ==== -* Tianqi, Nacho, Tianyi +rabit is a light weight library designed to provide fault tolerant interface of Allreduce and Broadcast. -Go! +Contributors: https://github.com/tqchen/rabit/graphs/contributors + +Design Goal +==== +* rabit should run fast +* rabit is light weight +* rabit dig safe burrows to avoid disasters diff --git a/src/config.h b/src/config.h index 45da45bdb..146948adc 100644 --- a/src/config.h +++ b/src/config.h @@ -1,5 +1,5 @@ -#ifndef ALLREDUCE_UTILS_CONFIG_H_ -#define ALLREDUCE_UTILS_CONFIG_H_ +#ifndef RABIT_UTILS_CONFIG_H_ +#define RABIT_UTILS_CONFIG_H_ /*! * \file config.h * \brief helper class to load in configures from file @@ -12,6 +12,7 @@ #include #include "./utils.h" +namespace rabit { namespace utils { /*! * \brief base implementation of config reader @@ -191,5 +192,5 @@ class ConfigIterator: public ConfigStreamReader { std::ifstream fi; }; } // namespace utils - -#endif // ALLREDUCE_UTILS_CONFIG_H_ +} // namespace rabit +#endif // RABIT_UTILS_CONFIG_H_ diff --git a/src/engine.cc b/src/engine.cc index 375f8e770..de58f4c4e 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -13,6 +13,7 @@ #include "./engine_base.h" #include "./engine_robust.h" +namespace rabit { namespace engine { // singleton sync manager AllReduceRobust manager; @@ -37,3 +38,4 @@ IEngine *GetEngine(void) { return &manager; } } // namespace engine +} // namespace rabit diff --git a/src/engine.h b/src/engine.h index d3493945f..aede4ac74 100644 --- a/src/engine.h +++ b/src/engine.h @@ -3,17 +3,18 @@ * \brief This file defines the core interface of allreduce library * \author Tianqi Chen, Nacho, Tianyi */ -#ifndef ALLREDUCE_ENGINE_H -#define ALLREDUCE_ENGINE_H +#ifndef RABIT_ENGINE_H +#define RABIT_ENGINE_H #include "./io.h" - namespace MPI { /*! \brief MPI data type just to be compatible with MPI reduce function*/ class Datatype; } -/*! \brief namespace of allreduce functionality */ +/*! \brief namespace of rabit */ +namespace rabit { +/*! \brief core interface of engine */ namespace engine { /*! \brief interface of core AllReduce engine */ class IEngine { @@ -79,4 +80,5 @@ void Finalize(void); IEngine *GetEngine(void); } // namespace engine -#endif // ALLREDUCE_ENGINE_H +} // namespace rabit +#endif // RABIT_ENGINE_H diff --git a/src/engine_base.cc b/src/engine_base.cc index eec2330fc..4e9a65229 100644 --- a/src/engine_base.cc +++ b/src/engine_base.cc @@ -10,6 +10,7 @@ #include #include "./engine_base.h" +namespace rabit { namespace engine { // constructor AllReduceBase::AllReduceBase(void) { @@ -373,4 +374,5 @@ AllReduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { } return kSuccess; } -} // namespace engine +} // namespace engine +} // namespace rabit diff --git a/src/engine_base.h b/src/engine_base.h index 582cf5e17..2fd5a761b 100644 --- a/src/engine_base.h +++ b/src/engine_base.h @@ -8,8 +8,8 @@ * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ -#ifndef ALLREDUCE_ENGINE_BASE_H -#define ALLREDUCE_ENGINE_BASE_H +#ifndef RABIT_ENGINE_BASE_H +#define RABIT_ENGINE_BASE_H #include #include @@ -25,7 +25,7 @@ class Datatype { Datatype(size_t type_size) : type_size(type_size) {} }; } - +namespace rabit { namespace engine { /*! \brief implementation of basic AllReduce engine */ class AllReduceBase : public IEngine { @@ -246,4 +246,5 @@ class AllReduceBase : public IEngine { int world_size; }; } // namespace engine -#endif // ALLREDUCE_ENGINE_BASE_H +} // namespace rabit +#endif // RABIT_ENGINE_BASE_H diff --git a/src/engine_robust-inl.h b/src/engine_robust-inl.h index 42558a750..1eae685cc 100644 --- a/src/engine_robust-inl.h +++ b/src/engine_robust-inl.h @@ -4,9 +4,10 @@ * * \author Tianqi Chen */ -#ifndef ALLREDUCE_ENGINE_ROBUST_INL_H -#define ALLREDUCE_ENGINE_ROBUST_INL_H +#ifndef RABIT_ENGINE_ROBUST_INL_H +#define RABIT_ENGINE_ROBUST_INL_H +namespace rabit { namespace engine { /*! * \brief run message passing algorithm on the allreduce tree @@ -147,4 +148,5 @@ AllReduceRobust::MsgPassing(const NodeType &node_value, return kSuccess; } } // namespace engine -#endif // ALLREDUCE_ENGINE_ROBUST_INL_H +} // namespace rabit +#endif // RABIT_ENGINE_ROBUST_INL_H diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 6b820f98a..cd393f445 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -12,6 +12,7 @@ #include "./utils.h" #include "./engine_robust.h" +namespace rabit { namespace engine { AllReduceRobust::AllReduceRobust(void) { result_buffer_round = 1; @@ -589,3 +590,5 @@ bool AllReduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { return true; } } // namespace engine +} // namespace rabit + diff --git a/src/engine_robust.h b/src/engine_robust.h index 92febdd70..0dbf31852 100644 --- a/src/engine_robust.h +++ b/src/engine_robust.h @@ -7,12 +7,13 @@ * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ -#ifndef ALLREDUCE_ENGINE_ROBUST_H -#define ALLREDUCE_ENGINE_ROBUST_H +#ifndef RABIT_ENGINE_ROBUST_H +#define RABIT_ENGINE_ROBUST_H #include #include "./engine.h" #include "./engine_base.h" +namespace rabit { namespace engine { /*! \brief implementation of fault tolerant all reduce engine */ class AllReduceRobust : public AllReduceBase { @@ -348,7 +349,8 @@ class AllReduceRobust : public AllReduceBase { }; } // namespace engine +} // namespace rabit // implementation of inline template function #include "./engine_robust-inl.h" -#endif // ALLREDUCE_ENGINE_ROBUST_H +#endif // RABIT_ENGINE_ROBUST_H diff --git a/src/io.h b/src/io.h index 97a33f163..913acaa9a 100644 --- a/src/io.h +++ b/src/io.h @@ -1,5 +1,5 @@ -#ifndef ALLREDUCE_UTILS_IO_H -#define ALLREDUCE_UTILS_IO_H +#ifndef RABIT_UTILS_IO_H +#define RABIT_UTILS_IO_H #include #include #include @@ -10,6 +10,7 @@ * \brief general stream interface for serialization, I/O * \author Tianqi Chen */ +namespace rabit { namespace utils { /*! * \brief interface of stream I/O, used to serialize model @@ -211,4 +212,5 @@ class FileStream : public ISeekStream { FILE *fp; }; } // namespace utils +} // namespace rabit #endif diff --git a/src/mock.h b/src/mock.h index d6afd49c6..1dd004c8b 100644 --- a/src/mock.h +++ b/src/mock.h @@ -1,17 +1,17 @@ -#ifndef ALLREDUCE_MOCK_H -#define ALLREDUCE_MOCK_H +#ifndef RABIT_MOCK_H +#define RABIT_MOCK_H /*! * \file mock.h * \brief This file defines a mock object to test the system - * \author Tianqi Chen, Nacho, Tianyi + * \author Ignacio Cano */ -#include "./allreduce.h" +#include "./rabit.h" #include "./config.h" #include #include #include - +namespace rabit { /*! \brief namespace of mock */ namespace test { @@ -27,22 +27,22 @@ public: template inline void AllReduce(float *sendrecvbuf, size_t count) { utils::Assert(verify(allReduce), "[%d] error when calling allReduce", rank); - sync::AllReduce(sendrecvbuf, count); + rabit::AllReduce(sendrecvbuf, count); } inline bool LoadCheckPoint(utils::ISerializable *p_model) { utils::Assert(verify(loadCheckpoint), "[%d] error when loading checkpoint", rank); - return sync::LoadCheckPoint(p_model); + return rabit::LoadCheckPoint(p_model); } inline void CheckPoint(const utils::ISerializable &model) { utils::Assert(verify(checkpoint), "[%d] error when checkpointing", rank); - sync::CheckPoint(model); + rabit::CheckPoint(model); } inline void Broadcast(std::string *sendrecv_data, int root) { utils::Assert(verify(broadcast), "[%d] error when broadcasting", rank); - sync::Bcast(sendrecv_data, root); + rabit::Bcast(sendrecv_data, root); } @@ -110,6 +110,7 @@ private: }; -} +} // namespace test +} // namespace rabit -#endif // ALLREDUCE_MOCK_H +#endif // RABIT_MOCK_H diff --git a/src/allreduce.h b/src/rabit.h similarity index 95% rename from src/allreduce.h rename to src/rabit.h index 3f389a591..635e3ff87 100644 --- a/src/allreduce.h +++ b/src/rabit.h @@ -1,7 +1,7 @@ -#ifndef ALLREDUCE_H -#define ALLREDUCE_H +#ifndef RABIT_RABIT_H +#define RABIT_RABIT_H /*! - * \file allreduce.h + * \file rabit.h * \brief This file defines a template wrapper of engine to give more flexible * AllReduce operations * @@ -9,8 +9,8 @@ */ #include "./engine.h" -/*! \brief namespace of all reduce */ -namespace sync { +/*! \brief namespace of rabit */ +namespace rabit { /*! \brief namespace of operator */ namespace op { struct Max { @@ -109,5 +109,5 @@ inline bool LoadCheckPoint(utils::ISerializable *p_model) { inline void CheckPoint(const utils::ISerializable &model) { engine::GetEngine()->CheckPoint(model); } -} // namespace allreduce -#endif // ALLREDUCE_H +} // namespace rabit +#endif // RABIT_ALLREDUCE_H diff --git a/src/socket.h b/src/socket.h index 8f6d969e6..296b8aeea 100644 --- a/src/socket.h +++ b/src/socket.h @@ -1,5 +1,5 @@ -#ifndef ALLREDUCE_SOCKET_H -#define ALLREDUCE_SOCKET_H +#ifndef RABIT_SOCKET_H +#define RABIT_SOCKET_H /*! * \file socket.h * \brief this file aims to provide a wrapper of sockets @@ -32,6 +32,7 @@ typedef size_t sock_size_t; const int INVALID_SOCKET = -1; #endif +namespace rabit { namespace utils { /*! \brief data structure for network address */ struct SockAddr { @@ -432,5 +433,6 @@ struct SelectHelper { SOCKET maxfd; fd_set read_set, write_set, except_set; }; -} +} // namespace utils +} // namespace rabit #endif diff --git a/src/utils.h b/src/utils.h index a371d6059..d09667d89 100644 --- a/src/utils.h +++ b/src/utils.h @@ -1,5 +1,5 @@ -#ifndef ALLREDUCE_UTILS_H_ -#define ALLREDUCE_UTILS_H_ +#ifndef RABIT_UTILS_H_ +#define RABIT_UTILS_H_ /*! * \file utils.h * \brief simple utils to support the code @@ -11,7 +11,7 @@ #include #include -#ifndef ALLREDUCE_STRICT_CXX98_ +#ifndef RABIT_STRICT_CXX98_ #include #endif @@ -50,13 +50,14 @@ typedef long int64_t; #include #endif +namespace rabit { /*! \brief namespace for helper utils of the project */ namespace utils { /*! \brief error message buffer length */ const int kPrintBuffer = 1 << 12; -#ifndef ALLREDUCE_CUSTOMIZE_MSG_ +#ifndef RABIT_CUSTOMIZE_MSG_ /*! * \brief handling of Assert error, caused by in-apropriate input * \param msg error message @@ -81,14 +82,14 @@ inline void HandleLogPrint(const char *msg) { fflush(stderr); } #else -#ifndef ALLREDUCE_STRICT_CXX98_ +#ifndef RABIT_STRICT_CXX98_ // include declarations, some one must implement this void HandleAssertError(const char *msg); void HandleCheckError(const char *msg); void HandlePrint(const char *msg); #endif #endif -#ifdef ALLREDUCE_STRICT_CXX98_ +#ifdef RABIT_STRICT_CXX98_ // these function pointers are to be assigned extern "C" void (*Printf)(const char *fmt, ...); extern "C" int (*SPrintf)(char *buf, size_t size, const char *fmt, ...); @@ -186,4 +187,5 @@ inline const T *BeginPtr(const std::vector &vec) { return &vec[0]; } } -#endif // ALLREDUCE_UTILS_H_ +} // namespace rabit +#endif // RABIT_UTILS_H_ diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 02cb4057f..7f9ad9f78 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -1,16 +1,15 @@ -#include +#include #include #include #include #include #include - -using namespace sync; +using namespace rabit; inline void TestMax(test::Mock &mock, size_t n) { - int rank = sync::GetRank(); - int nproc = sync::GetWorldSize(); + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); std::vector ndata(n); for (size_t i = 0; i < ndata.size(); ++i) { @@ -27,8 +26,8 @@ inline void TestMax(test::Mock &mock, size_t n) { } inline void TestSum(test::Mock &mock, size_t n) { - int rank = sync::GetRank(); - int nproc = sync::GetWorldSize(); + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); const int z = 131; std::vector ndata(n); @@ -47,7 +46,7 @@ inline void TestSum(test::Mock &mock, size_t n) { } inline void TestBcast(test::Mock &mock, size_t n, int root) { - int rank = sync::GetRank(); + int rank = rabit::GetRank(); std::string s; s.resize(n); for (size_t i = 0; i < n; ++i) { s[i] = char(i % 126 + 1); @@ -68,10 +67,10 @@ int main(int argc, char *argv[]) { return 0; } int n = atoi(argv[1]); - sync::Init(argc, argv); - int rank = sync::GetRank(); - int nproc = sync::GetWorldSize(); - std::string name = sync::GetProcessorName(); + rabit::Init(argc, argv); + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + std::string name = rabit::GetProcessorName(); test::Mock mock(rank, argv[2], argv[3]); @@ -84,7 +83,7 @@ int main(int argc, char *argv[]) { TestBcast(mock, n, i); } utils::LogPrintf("[%d] !!!TestBcast pass\n", rank); - sync::Finalize(); + rabit::Finalize(); printf("[%d] all check pass\n", rank); return 0; } From 62c8ce96572dab92c4c288b7f44947f2d10557b2 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Dec 2014 10:03:31 -0800 Subject: [PATCH 030/531] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 17b556f03..de09b890b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ -rabit: Robust Allreduce and Broadcast Interface +rabit ==== +#### Robust Allreduce and Broadcast Interface + rabit is a light weight library designed to provide fault tolerant interface of Allreduce and Broadcast. Contributors: https://github.com/tqchen/rabit/graphs/contributors From b5367f48f6a5a310fb46ec5cdf0799d16b3dfbef Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Dec 2014 10:03:45 -0800 Subject: [PATCH 031/531] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index de09b890b..57449879f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ rabit ==== -#### Robust Allreduce and Broadcast Interface +## Robust Allreduce and Broadcast Interface rabit is a light weight library designed to provide fault tolerant interface of Allreduce and Broadcast. From 0d636460156b7bc4b262ba1a84c02df2678e6b0f Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Dec 2014 10:04:10 -0800 Subject: [PATCH 032/531] Update README.md --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 57449879f..937edd284 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,4 @@ -rabit -==== -## Robust Allreduce and Broadcast Interface +## rabit: Robust Allreduce and Broadcast Interface rabit is a light weight library designed to provide fault tolerant interface of Allreduce and Broadcast. From 1c5167d96e9c313c0c2e08993aefb00bb1e60da6 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Dec 2014 10:32:30 -0800 Subject: [PATCH 033/531] rabit seems ready to run --- src/engine_robust.cc | 59 +++++++++++++++++++++++++++++++++++++++----- src/engine_robust.h | 2 +- src/io.h | 1 + 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index cd393f445..44497bfbe 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -9,6 +9,7 @@ #define NOMINMAX #include #include +#include "./io.h" #include "./utils.h" #include "./engine_robust.h" @@ -16,6 +17,7 @@ namespace rabit { namespace engine { AllReduceRobust::AllReduceRobust(void) { result_buffer_round = 1; + seq_counter = 0; } /*! * \brief perform in-place allreduce, on sendrecvbuf @@ -58,9 +60,26 @@ void AllReduceRobust::AllReduce(void *sendrecvbuf_, * \param root the root worker id to broadcast the data */ void AllReduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) { - utils::Assert(TryBroadcast(sendrecvbuf_, total_size, root) == kSuccess, - "AllReduce failed"); - // TODO + bool recovered = RecoverExec(sendrecvbuf_, total_size, 0, seq_counter); + // now we are free to remove the last result, if any + if (resbuf.LastSeqNo() != -1 && + (resbuf.LastSeqNo() % result_buffer_round != rank % result_buffer_round)) { + resbuf.DropLast(); + } + void *temp = resbuf.AllocTemp(1, total_size); + while (true) { + if (recovered) { + std::memcpy(temp, sendrecvbuf_, total_size); break; + } else { + if (CheckAndRecover(TryBroadcast(sendrecvbuf_, total_size, root))) { + std::memcpy(temp, sendrecvbuf_, total_size); break; + } else { + recovered = RecoverExec(sendrecvbuf_, total_size, 0, seq_counter); + } + } + } + resbuf.PushTemp(seq_counter, 1, total_size); + seq_counter += 1; } /*! * \brief load latest check point @@ -69,15 +88,43 @@ void AllReduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) * false if there was no stored checkpoint, means we are start over gain */ bool AllReduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { - // TODO - return false; + // check if we succesfll + if (RecoverExec(NULL, 0, ActionSummary::kLoadCheck, ActionSummary::kMaxSeq)) { + // if loaded model is empty, this simply means we did not call checkpoint yet + // ask caller to reinit model + if (checked_model.length() == 0) return false; + // load from buffer + utils::MemoryBufferStream fs(&checked_model); + p_model->Load(fs); + // reset result buffer + resbuf.Clear(); seq_counter = 0; + // run another phase of check ack, if recovered from data + utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kMaxSeq), + "check ack must return true"); + return true; + } else { + // nothing loaded, a fresh start, everyone init model + return false; + } } /*! * \brief checkpoint the model, meaning we finished a stage of execution * \param p_model pointer to the model */ void AllReduceRobust::CheckPoint(const utils::ISerializable &model) { - // TODO + // save model + checked_model.resize(0); + utils::MemoryBufferStream fs(&checked_model); + model.Save(fs); + utils::Check(checked_model.length() != 0, "CheckPoint: empty model, model.Save must save something"); + // execute checkpoint, note: when checkpoint existing, load will not happen + utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kMaxSeq), + "check point must return true"); + // reset result buffer + resbuf.Clear(); seq_counter = 0; + // execute check ack step, load happens here + utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kMaxSeq), + "check ack must return true"); } /*! * \brief reset the all the existing links by sending Out-of-Band message marker diff --git a/src/engine_robust.h b/src/engine_robust.h index 0dbf31852..703a54469 100644 --- a/src/engine_robust.h +++ b/src/engine_robust.h @@ -154,7 +154,7 @@ class AllReduceRobust : public AllReduceBase { rptr_.clear(); rptr_.push_back(0); data_.clear(); } - // allocate temporal space for + // allocate temporal space inline void *AllocTemp(size_t type_nbytes, size_t count) { size_t size = type_nbytes * count; size_t nhop = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); diff --git a/src/io.h b/src/io.h index 913acaa9a..ed01545f2 100644 --- a/src/io.h +++ b/src/io.h @@ -91,6 +91,7 @@ class IStream { /*! \brief interface of se*/ class ISerializable { + public: /*! \brief load the model from file */ virtual void Load(IStream &fi) = 0; /*! \brief save the model to the stream*/ From fd2c57b8a4741fab8c07d1b9888dd4f5ba44cf1e Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 1 Dec 2014 15:32:57 -0800 Subject: [PATCH 034/531] Update engine_robust.cc --- src/engine_robust.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 44497bfbe..be950aa98 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -303,7 +303,8 @@ ShortestDist(const std::pair &node_value, res = dist_in[i].first; size = dist_in[i].second; } } - return std::make_pair(res, size); + // add one hop + return std::make_pair(res + 1, size); } /*! * \brief message passing function, used to decide the From 337840d29bcefd6eabcd78f65042c5b20632f28e Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Dec 2014 16:57:26 -0800 Subject: [PATCH 035/531] recover not yet working --- src/engine.h | 10 +++- src/engine_base.h | 10 +++- src/engine_robust.h | 8 +++ test/Makefile | 3 +- test/test.sh | 2 +- test/test_recover.cpp | 126 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 154 insertions(+), 5 deletions(-) create mode 100644 test/test_recover.cpp diff --git a/src/engine.h b/src/engine.h index aede4ac74..807f7c6ad 100644 --- a/src/engine.h +++ b/src/engine.h @@ -52,7 +52,13 @@ class IEngine { * \param root the root worker id to broadcast the data */ virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) = 0; - /*! + /*! + * \brief explicitly re-init everything before calling LoadCheckPoint + * call this function when IEngine throw an exception out, + * this function is only used for test purpose + */ + virtual void InitAfterException(void) = 0; + /*! * \brief load latest check point * \param p_model pointer to the model * \return true if there was stored checkpoint and load was successful @@ -63,7 +69,7 @@ class IEngine { * \brief checkpoint the model, meaning we finished a stage of execution * \param p_model pointer to the model */ - virtual void CheckPoint(const utils::ISerializable &model) = 0; + virtual void CheckPoint(const utils::ISerializable &model) = 0; /*! \brief get rank of current node */ virtual int GetRank(void) const = 0; /*! \brief get total number of */ diff --git a/src/engine_base.h b/src/engine_base.h index 2fd5a761b..0cc281cff 100644 --- a/src/engine_base.h +++ b/src/engine_base.h @@ -93,7 +93,15 @@ class AllReduceBase : public IEngine { */ virtual void CheckPoint(const utils::ISerializable &model) { } - + /*! + * \brief explicitly re-init everything before calling LoadCheckPoint + * call this function when IEngine throw an exception out, + * this function is only used for test purpose + */ + virtual void InitAfterException(void) { + utils::Error("InitAfterException: not implemented"); + } + protected: /*! \brief enumeration of possible returning results from Try functions */ enum ReturnType { diff --git a/src/engine_robust.h b/src/engine_robust.h index 703a54469..783b2deb7 100644 --- a/src/engine_robust.h +++ b/src/engine_robust.h @@ -51,6 +51,14 @@ class AllReduceRobust : public AllReduceBase { * \param p_model pointer to the model */ virtual void CheckPoint(const utils::ISerializable &model); + /*! + * \brief explicitly re-init everything before calling LoadCheckPoint + * call this function when IEngine throw an exception out, + * this function is only used for test purpose + */ + virtual void InitAfterException(void) { + this->CheckAndRecover(kGetExcept); + } private: // constant one byte out of band message to indicate error happening diff --git a/test/Makefile b/test/Makefile index 49aca06e1..a3f6b07c7 100644 --- a/test/Makefile +++ b/test/Makefile @@ -11,7 +11,7 @@ else endif # specify tensor path -BIN = test_allreduce +BIN = test_allreduce test_recover OBJ = engine_base.o engine_robust.o engine.o .PHONY: clean all @@ -22,6 +22,7 @@ engine_base.o: ../src/engine_base.cc ../src/*.h engine.o: ../src/engine.cc ../src/*.h engine_robust.o: ../src/engine_robust.cc ../src/*.h test_allreduce: test_allreduce.cpp ../src/*.h $(OBJ) +test_recover: test_recover.cpp ../src/*.h $(OBJ) $(BIN) : $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) diff --git a/test/test.sh b/test/test.sh index 5c70404ac..30d6bbca7 100755 --- a/test/test.sh +++ b/test/test.sh @@ -5,4 +5,4 @@ then exit -1 fi -../submit_job_tcp.py $1 test_allreduce $2 $3 $4 \ No newline at end of file +../submit_job_tcp.py $1 test_recover $2 $3 $4 diff --git a/test/test_recover.cpp b/test/test_recover.cpp new file mode 100644 index 000000000..81e81c6fa --- /dev/null +++ b/test/test_recover.cpp @@ -0,0 +1,126 @@ +#include +#include +#include +#include +#include +#include + +using namespace rabit; + +struct MockException { +}; + +inline void TestMax(test::Mock &mock, size_t n, int ntrial) { + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + + std::vector ndata(n); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % 111; + } + mock.AllReduce(&ndata[0], ndata.size()); + for (size_t i = 0; i < ndata.size(); ++i) { + float rmax = (i * 1) % 111; + for (int r = 0; r < nproc; ++r) { + rmax = std::max(rmax, (float)((i * (r+1)) % 111)); + } + utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank); + } +} + +inline void TestSum(test::Mock &mock, size_t n, int ntrial) { + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + const int z = 131; + + std::vector ndata(n); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % z; + } + mock.AllReduce(&ndata[0], ndata.size()); + + if (ntrial == 0 && rank == 0) throw MockException(); + + for (size_t i = 0; i < ndata.size(); ++i) { + float rsum = 0.0f; + for (int r = 0; r < nproc; ++r) { + rsum += (float)((i * (r+1)) % z); + } + utils::Check(fabsf(rsum - ndata[i]) < 1e-5 , + "[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]); + } +} + +inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { + int rank = rabit::GetRank(); + std::string s; s.resize(n); + for (size_t i = 0; i < n; ++i) { + s[i] = char(i % 126 + 1); + } + std::string res; + if (root == rank) { + res = s; + mock.Broadcast(&res, root); + } else { + mock.Broadcast(&res, root); + } + utils::Check(res == s, "[%d] TestBcast fail", rank); +} +// dummy model +class Model : public rabit::utils::ISerializable { + public: + // iterations + int iter; + // load from stream + virtual void Load(rabit::utils::IStream &fi) { + fi.Read(&iter, sizeof(iter)); + } + /*! \brief save the model to the stream */ + virtual void Save(rabit::utils::IStream &fo) const { + fo.Write(&iter, sizeof(iter)); + } + virtual void InitModel(void) { + iter = 0; + } +}; + +int main(int argc, char *argv[]) { + if (argc < 3) { + printf("Usage: \n"); + return 0; + } + int n = atoi(argv[1]); + rabit::Init(argc, argv); + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + std::string name = rabit::GetProcessorName(); + test::Mock mock(rank, argv[2], argv[3]); + Model model; + srand(0); + int ntrial = 0; + while (true) { + try { + if (!rabit::LoadCheckPoint(&model)) { + model.InitModel(); + } + utils::LogPrintf("[%d] start at %s\n", rank, name.c_str()); + TestMax(mock, n, ntrial); + utils::LogPrintf("[%d] !!!TestMax pass\n", rank); + TestSum(mock, n, ntrial); + utils::LogPrintf("[%d] !!!TestSum pass\n", rank); + + for (int i = 0; i < nproc; i += nproc / 3) { + TestBcast(mock, n, i, ntrial); + } + utils::LogPrintf("[%d] !!!TestBcast pass\n", rank); + // reach here + break; + } catch (MockException &e) { + rabit::engine::GetEngine()->InitAfterException(); + ++ntrial; + } + } + rabit::Finalize(); + printf("[%d] all check pass\n", rank); + return 0; +} From 993ff8bb912c75c743a8a9c5dd2fb69c338c9f3b Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Dec 2014 19:34:27 -0800 Subject: [PATCH 036/531] find one bug, continue to next one --- src/engine_robust.cc | 15 ++++++++++----- test/test.sh | 2 +- test/test_recover.cpp | 12 ++++++------ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index be950aa98..099e83934 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -16,7 +16,7 @@ namespace rabit { namespace engine { AllReduceRobust::AllReduceRobust(void) { - result_buffer_round = 1; + result_buffer_round = 2; seq_counter = 0; } /*! @@ -32,6 +32,7 @@ void AllReduceRobust::AllReduce(void *sendrecvbuf_, size_t count, ReduceFunction reducer) { bool recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter); + utils::LogPrintf("[%d] AllReduce recovered=%d\n", rank, recovered); // now we are free to remove the last result, if any if (resbuf.LastSeqNo() != -1 && (resbuf.LastSeqNo() % result_buffer_round != rank % result_buffer_round)) { @@ -90,19 +91,21 @@ void AllReduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) bool AllReduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { // check if we succesfll if (RecoverExec(NULL, 0, ActionSummary::kLoadCheck, ActionSummary::kMaxSeq)) { + // reset result buffer + resbuf.Clear(); seq_counter = 0; // if loaded model is empty, this simply means we did not call checkpoint yet // ask caller to reinit model if (checked_model.length() == 0) return false; // load from buffer utils::MemoryBufferStream fs(&checked_model); p_model->Load(fs); - // reset result buffer - resbuf.Clear(); seq_counter = 0; // run another phase of check ack, if recovered from data utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kMaxSeq), "check ack must return true"); return true; } else { + // reset result buffer + resbuf.Clear(); seq_counter = 0; // nothing loaded, a fresh start, everyone init model return false; } @@ -362,7 +365,7 @@ AllReduceRobust::TryDecideRouting(AllReduceRobust::RecoverType role, for (size_t i = 0; i < dist_in.size(); ++i) { if (dist_in[i].first != std::numeric_limits::max()) { utils::Check(best_link == -2 || *p_size == dist_in[i].second, - "AllReduce size inconsistent"); + "AllReduce size inconsistent, size=%lu, reporting=%lu", *p_size, dist_in[i].second); if (best_link == -2 || dist_in[i].first < dist_in[best_link].first) { best_link = static_cast(i); *p_size = dist_in[i].second; @@ -413,6 +416,7 @@ AllReduceRobust::TryRecoverData(RecoverType role, size_t size, int recv_link, const std::vector &req_in) { + utils::LogPrintf("[%d] recv_link=%d\n", rank, recv_link); // no need to run recovery for zero size message if (links.size() == 0 || size == 0) return kSuccess; utils::Assert(req_in.size() == links.size(), "TryRecoverData"); @@ -519,7 +523,7 @@ AllReduceRobust::ReturnType AllReduceRobust::TryLoadCheckPoint(bool requester) { if (role == kRequestData) { checked_model.resize(size); } - utils::Check(size != 0, "zero size check point is not allowed"); + if (size == 0) return kSuccess; return TryRecoverData(role, &checked_model[0], size, recv_link, req_in); } /*! @@ -574,6 +578,7 @@ bool AllReduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { } // request ActionSummary req(flag, seqno); + utils::LogPrintf("[%d] propose flag=%d, seq=%d\n", rank, flag, seqno); while (true) { // action ActionSummary act = req; diff --git a/test/test.sh b/test/test.sh index 30d6bbca7..9b27abb8b 100755 --- a/test/test.sh +++ b/test/test.sh @@ -5,4 +5,4 @@ then exit -1 fi -../submit_job_tcp.py $1 test_recover $2 $3 $4 +../submit_job_tcp.py $1 test_recover $2 $3 $4 $5 diff --git a/test/test_recover.cpp b/test/test_recover.cpp index 81e81c6fa..9267cdca5 100644 --- a/test/test_recover.cpp +++ b/test/test_recover.cpp @@ -103,20 +103,20 @@ int main(int argc, char *argv[]) { if (!rabit::LoadCheckPoint(&model)) { model.InitModel(); } - utils::LogPrintf("[%d] start at %s\n", rank, name.c_str()); + utils::LogPrintf("[%d/%d] start at %s\n", rank, ntrial, name.c_str()); TestMax(mock, n, ntrial); - utils::LogPrintf("[%d] !!!TestMax pass\n", rank); + utils::LogPrintf("[%d/%d] !!!TestMax pass\n", rank, ntrial); TestSum(mock, n, ntrial); - utils::LogPrintf("[%d] !!!TestSum pass\n", rank); - - for (int i = 0; i < nproc; i += nproc / 3) { + utils::LogPrintf("[%d/%d] !!!TestSum pass\n", rank, ntrial); + int step = std::max(nproc / 3, 1); + for (int i = 0; i < nproc; i += step) { TestBcast(mock, n, i, ntrial); } utils::LogPrintf("[%d] !!!TestBcast pass\n", rank); // reach here break; } catch (MockException &e) { - rabit::engine::GetEngine()->InitAfterException(); + //rabit::engine::GetEngine()->InitAfterException(); ++ntrial; } } From 46b5d46111c4153756687d529f7e95bede17cc86 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Dec 2014 19:53:41 -0800 Subject: [PATCH 037/531] fix one bug, another comes --- src/engine_base.cc | 6 +++++- src/engine_base.h | 6 +++++- src/engine_robust.cc | 24 ++++++++++++++++++------ src/engine_robust.h | 12 +++++++++--- test/test.sh | 4 ++-- 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/src/engine_base.cc b/src/engine_base.cc index 4e9a65229..3b08d1502 100644 --- a/src/engine_base.cc +++ b/src/engine_base.cc @@ -111,7 +111,11 @@ void AllReduceBase::Shutdown(void) { links.clear(); utils::TCPSocket::Finalize(); } -// set the parameters for AllReduce +/*! + * \brief set parameters to the engine + * \param name parameter name + * \param val parameter value + */ void AllReduceBase::SetParam(const char *name, const char *val) { if (!strcmp(name, "master_uri")) master_uri = val; if (!strcmp(name, "master_port")) master_port = atoi(val); diff --git a/src/engine_base.h b/src/engine_base.h index 0cc281cff..9e533fe27 100644 --- a/src/engine_base.h +++ b/src/engine_base.h @@ -39,7 +39,11 @@ class AllReduceBase : public IEngine { void Shutdown(void); // initialize the manager void Init(void); - /*! \brief set parameters to the sync manager */ + /*! + * \brief set parameters to the engine + * \param name parameter name + * \param val parameter value + */ virtual void SetParam(const char *name, const char *val); /*! \brief get rank */ virtual int GetRank(void) const { diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 099e83934..9db11bc96 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -19,6 +19,13 @@ AllReduceRobust::AllReduceRobust(void) { result_buffer_round = 2; seq_counter = 0; } +void AllReduceRobust::SetParam(const char *name, const char *val) { + AllReduceBase::SetParam(name, val); + if (!strcmp(name, "result_buffer_round")) result_buffer_round = atoi(val); + if (!strcmp(name, "result_replicate")) { + result_buffer_round = std::max(world_size / atoi(val), 1); + } +} /*! * \brief perform in-place allreduce, on sendrecvbuf * this function is NOT thread-safe @@ -32,7 +39,7 @@ void AllReduceRobust::AllReduce(void *sendrecvbuf_, size_t count, ReduceFunction reducer) { bool recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter); - utils::LogPrintf("[%d] AllReduce recovered=%d\n", rank, recovered); + //utils::LogPrintf("[%d] AllReduce recovered=%d\n", rank, recovered); // now we are free to remove the last result, if any if (resbuf.LastSeqNo() != -1 && (resbuf.LastSeqNo() % result_buffer_round != rank % result_buffer_round)) { @@ -302,12 +309,15 @@ ShortestDist(const std::pair &node_value, int res = std::numeric_limits::max(); for (size_t i = 0; i < dist_in.size(); ++i) { if (i == out_index) continue; - if (dist_in[i].first < res) { - res = dist_in[i].first; size = dist_in[i].second; + if (dist_in[i].first == std::numeric_limits::max()) continue; + if (dist_in[i].first + 1 < res) { + res = dist_in[i].first + 1; + size = dist_in[i].second; } } // add one hop - return std::make_pair(res + 1, size); + + return std::make_pair(res, size); } /*! * \brief message passing function, used to decide the @@ -365,7 +375,8 @@ AllReduceRobust::TryDecideRouting(AllReduceRobust::RecoverType role, for (size_t i = 0; i < dist_in.size(); ++i) { if (dist_in[i].first != std::numeric_limits::max()) { utils::Check(best_link == -2 || *p_size == dist_in[i].second, - "AllReduce size inconsistent, size=%lu, reporting=%lu", *p_size, dist_in[i].second); + "[%d] AllReduce size inconsistent, distin=%lu, size=%lu, reporting=%lu\n", + rank, dist_in[i].first, *p_size, dist_in[i].second); if (best_link == -2 || dist_in[i].first < dist_in[best_link].first) { best_link = static_cast(i); *p_size = dist_in[i].second; @@ -416,7 +427,6 @@ AllReduceRobust::TryRecoverData(RecoverType role, size_t size, int recv_link, const std::vector &req_in) { - utils::LogPrintf("[%d] recv_link=%d\n", rank, recv_link); // no need to run recovery for zero size message if (links.size() == 0 || size == 0) return kSuccess; utils::Assert(req_in.size() == links.size(), "TryRecoverData"); @@ -432,6 +442,7 @@ AllReduceRobust::TryRecoverData(RecoverType role, // do not need to provide data or receive data, directly exit if (!req_data) return kSuccess; } + utils::LogPrintf("[%d] !!Need to pass data\n", rank); utils::Assert(recv_link >= 0 || role == kHaveData, "recv_link must be active"); for (int i = 0; i < nlink; ++i) { links[i].ResetSize(); @@ -548,6 +559,7 @@ AllReduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool re } else { role = kRequestData; } + utils::LogPrintf("[%d] role=%d\n", rank, role); int recv_link; std::vector req_in; ReturnType succ = TryDecideRouting(role, &size, &recv_link, &req_in); diff --git a/src/engine_robust.h b/src/engine_robust.h index 783b2deb7..7116764d8 100644 --- a/src/engine_robust.h +++ b/src/engine_robust.h @@ -20,6 +20,12 @@ class AllReduceRobust : public AllReduceBase { public: AllReduceRobust(void); virtual ~AllReduceRobust(void) {} + /*! + * \brief set parameters to the engine + * \param name parameter name + * \param val parameter value + */ + virtual void SetParam(const char *name, const char *val); /*! * \brief perform in-place allreduce, on sendrecvbuf * this function is NOT thread-safe @@ -71,11 +77,11 @@ class AllReduceRobust : public AllReduceBase { /*! \brief type of roles each node can play during recovery */ enum RecoverType { /*! \brief current node have data */ - kHaveData, + kHaveData = 0, /*! \brief current node request data */ - kRequestData, + kRequestData = 1, /*! \brief current node only helps to pass data around */ - kPassData + kPassData = 2 }; /*! * \brief summary of actions proposed in all nodes diff --git a/test/test.sh b/test/test.sh index 9b27abb8b..78d267157 100755 --- a/test/test.sh +++ b/test/test.sh @@ -1,8 +1,8 @@ #!/bin/bash -if [ "$#" -ne 4 ]; +if [ "$#" -lt 4 ]; then echo "Usage " exit -1 fi -../submit_job_tcp.py $1 test_recover $2 $3 $4 $5 +../submit_job_tcp.py $1 test_recover "${@:2}" From b76cd5858cd1ce831679e8b3091d0b871609b032 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Dec 2014 20:18:25 -0800 Subject: [PATCH 038/531] seems ok version --- src/engine_robust.cc | 22 +++++++++------------- test/test_recover.cpp | 1 + 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/engine_robust.cc b/src/engine_robust.cc index 9db11bc96..ab33b0f0a 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -39,7 +39,6 @@ void AllReduceRobust::AllReduce(void *sendrecvbuf_, size_t count, ReduceFunction reducer) { bool recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter); - //utils::LogPrintf("[%d] AllReduce recovered=%d\n", rank, recovered); // now we are free to remove the last result, if any if (resbuf.LastSeqNo() != -1 && (resbuf.LastSeqNo() % result_buffer_round != rank % result_buffer_round)) { @@ -442,14 +441,10 @@ AllReduceRobust::TryRecoverData(RecoverType role, // do not need to provide data or receive data, directly exit if (!req_data) return kSuccess; } - utils::LogPrintf("[%d] !!Need to pass data\n", rank); utils::Assert(recv_link >= 0 || role == kHaveData, "recv_link must be active"); for (int i = 0; i < nlink; ++i) { links[i].ResetSize(); } - if (role == kPassData) { - links[recv_link].InitBuffer(1, size, reduce_buffer_size); - } while (true) { bool finished = true; utils::SelectHelper selecter; @@ -457,9 +452,12 @@ AllReduceRobust::TryRecoverData(RecoverType role, if (i == recv_link && links[i].size_read != size) { selecter.WatchRead(links[i].sock); finished = false; - } + } if (req_in[i] && links[i].size_write != size) { - selecter.WatchWrite(links[i].sock); + if (role == kHaveData || + (role == kPassData && links[recv_link].size_read != links[i].size_write)) { + selecter.WatchWrite(links[i].sock); + } finished = false; } selecter.WatchException(links[i].sock); @@ -496,12 +494,12 @@ AllReduceRobust::TryRecoverData(RecoverType role, utils::Assert(min_write <= links[pid].size_read, "boundary check"); if (!links[pid].ReadToRingBuffer(min_write)) return kSockError; } - for (int i = 0; i < nlink; ++i) { - if (req_in[i] && selecter.CheckWrite(links[i].sock)) { + for (int i = 0; i < nlink; ++i) { + if (req_in[i] && selecter.CheckWrite(links[i].sock) && links[pid].size_read != links[i].size_write) { size_t start = links[i].size_write % buffer_size; // send out data from ring buffer - size_t nwrite = std::min(buffer_size - start, links[pid].size_read - links[i].size_write); - ssize_t len = links[pid].sock.Send(links[pid].buffer_head + start, nwrite); + size_t nwrite = std::min(buffer_size - start, links[pid].size_read - links[i].size_write); + ssize_t len = links[i].sock.Send(links[pid].buffer_head + start, nwrite); if (len != -1) { links[i].size_write += len; } else { @@ -559,7 +557,6 @@ AllReduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool re } else { role = kRequestData; } - utils::LogPrintf("[%d] role=%d\n", rank, role); int recv_link; std::vector req_in; ReturnType succ = TryDecideRouting(role, &size, &recv_link, &req_in); @@ -590,7 +587,6 @@ bool AllReduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { } // request ActionSummary req(flag, seqno); - utils::LogPrintf("[%d] propose flag=%d, seq=%d\n", rank, flag, seqno); while (true) { // action ActionSummary act = req; diff --git a/test/test_recover.cpp b/test/test_recover.cpp index 9267cdca5..215177f20 100644 --- a/test/test_recover.cpp +++ b/test/test_recover.cpp @@ -19,6 +19,7 @@ inline void TestMax(test::Mock &mock, size_t n, int ntrial) { ndata[i] = (i * (rank+1)) % 111; } mock.AllReduce(&ndata[0], ndata.size()); + if (ntrial == 0 && rank == 15) throw MockException(); for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % 111; for (int r = 0; r < nproc; ++r) { From 255218a2f35ceb0c2ec7157873e5305935d11dc3 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Dec 2014 21:39:51 -0800 Subject: [PATCH 039/531] change in interface, seems resetlink is still bad --- src/engine.h | 26 ++++++++++++++++++++++--- src/engine_base.cc | 1 + src/engine_base.h | 32 +++++++++++++++++++++++-------- src/engine_robust.cc | 44 +++++++++++++++++++++++++++++++++---------- src/engine_robust.h | 22 ++++++++++++++-------- src/rabit.h | 30 +++++++++++++++++++++++++---- test/Makefile | 3 ++- test/test_recover.cpp | 10 ++++------ 8 files changed, 128 insertions(+), 40 deletions(-) diff --git a/src/engine.h b/src/engine.h index 807f7c6ad..1c040a9e4 100644 --- a/src/engine.h +++ b/src/engine.h @@ -61,15 +61,35 @@ class IEngine { /*! * \brief load latest check point * \param p_model pointer to the model - * \return true if there was stored checkpoint and load was successful - * false if there was no stored checkpoint, means we are start over gain + * \return the version number of check point loaded + * if returned version == 0, this means no model has been CheckPointed + * the p_model is not touched, user should do necessary initialization by themselves + * + * Common usage example: + * int iter = rabit::LoadCheckPoint(&model); + * if (iter == 0) model.InitParameters(); + * for (i = iter; i < max_iter; ++i) { + * do many things, include allreduce + * rabit::CheckPoint(model); + * } + * + * \sa CheckPoint, VersionNumber */ - virtual bool LoadCheckPoint(utils::ISerializable *p_model) = 0; + virtual int LoadCheckPoint(utils::ISerializable *p_model) = 0; /*! * \brief checkpoint the model, meaning we finished a stage of execution + * every time we call check point, there is a version number which will increase by one + * * \param p_model pointer to the model + * \sa LoadCheckPoint, VersionNumber */ virtual void CheckPoint(const utils::ISerializable &model) = 0; + /*! + * \return version number of current stored model, + * which means how many calls to CheckPoint we made so far + * \sa LoadCheckPoint, CheckPoint + */ + virtual int VersionNumber(void) const = 0; /*! \brief get rank of current node */ virtual int GetRank(void) const = 0; /*! \brief get total number of */ diff --git a/src/engine_base.cc b/src/engine_base.cc index 3b08d1502..556b71e08 100644 --- a/src/engine_base.cc +++ b/src/engine_base.cc @@ -21,6 +21,7 @@ AllReduceBase::AllReduceBase(void) { nport_trial = 1000; rank = 0; world_size = 1; + version_number = 0; this->SetParam("reduce_buffer", "256MB"); } diff --git a/src/engine_base.h b/src/engine_base.h index 9e533fe27..48d38aeb9 100644 --- a/src/engine_base.h +++ b/src/engine_base.h @@ -35,10 +35,10 @@ class AllReduceBase : public IEngine { // constant one byte out of band message to indicate error happening AllReduceBase(void); virtual ~AllReduceBase(void) {} - // shutdown the engine - void Shutdown(void); // initialize the manager void Init(void); + // shutdown the engine + virtual void Shutdown(void); /*! * \brief set parameters to the engine * \param name parameter name @@ -82,20 +82,34 @@ class AllReduceBase : public IEngine { utils::Assert(TryBroadcast(sendrecvbuf_, total_size, root) == kSuccess, "AllReduce failed"); } - /*! + /*! * \brief load latest check point * \param p_model pointer to the model - * \return true if there was stored checkpoint and load was successful - * false if there was no stored checkpoint, means we are start over gain - */ - virtual bool LoadCheckPoint(utils::ISerializable *p_model) { - return false; + * \return the version number of check point loaded + * if returned version == 0, this means no model has been CheckPointed + * the p_model is not touched, user should do necessary initialization by themselves + * \sa CheckPoint, VersionNumber + */ + virtual int LoadCheckPoint(utils::ISerializable *p_model) { + return 0; } /*! * \brief checkpoint the model, meaning we finished a stage of execution + * every time we call check point, there is a version number which will increase by one + * * \param p_model pointer to the model + * \sa LoadCheckPoint, VersionNumber */ virtual void CheckPoint(const utils::ISerializable &model) { + version_number += 1; + } + /*! + * \return version number of current stored model, + * which means how many calls to CheckPoint we made so far + * \sa LoadCheckPoint, CheckPoint + */ + virtual int VersionNumber(void) const { + return version_number; } /*! * \brief explicitly re-init everything before calling LoadCheckPoint @@ -236,6 +250,8 @@ class AllReduceBase : public IEngine { * \sa ReturnType */ ReturnType TryBroadcast(void *sendrecvbuf_, size_t size, int root); + //---- data structure related to model ---- + int version_number; //---- local data related to link ---- // index of parent link, can be -1, meaning this is root of the tree int parent_index; diff --git a/src/engine_robust.cc b/src/engine_robust.cc index ab33b0f0a..59a5b79a3 100644 --- a/src/engine_robust.cc +++ b/src/engine_robust.cc @@ -16,9 +16,27 @@ namespace rabit { namespace engine { AllReduceRobust::AllReduceRobust(void) { - result_buffer_round = 2; + result_buffer_round = 1; seq_counter = 0; } +/*! \brief shutdown the engine */ +void AllReduceRobust::Shutdown(void) { + // need to sync the exec before we shutdown, do a pesudo check point + // execute checkpoint, note: when checkpoint existing, load will not happen + utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kMaxSeq), + "check point must return true"); + // reset result buffer + resbuf.Clear(); seq_counter = 0; + // execute check ack step, load happens here + utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kMaxSeq), + "check ack must return true"); + AllReduceBase::Shutdown(); +} +/*! + * \brief set parameters to the engine + * \param name parameter name + * \param val parameter value + */ void AllReduceRobust::SetParam(const char *name, const char *val) { AllReduceBase::SetParam(name, val); if (!strcmp(name, "result_buffer_round")) result_buffer_round = atoi(val); @@ -91,24 +109,25 @@ void AllReduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) /*! * \brief load latest check point * \param p_model pointer to the model - * \return true if there was stored checkpoint and load was successful - * false if there was no stored checkpoint, means we are start over gain + * \return the version number of check point loaded + * if returned version == 0, this means no model has been CheckPointed + * the p_model is not touched, user should do necessary initialization by themselves + * \sa CheckPoint, VersionNumber */ -bool AllReduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { +int AllReduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { // check if we succesfll if (RecoverExec(NULL, 0, ActionSummary::kLoadCheck, ActionSummary::kMaxSeq)) { // reset result buffer resbuf.Clear(); seq_counter = 0; - // if loaded model is empty, this simply means we did not call checkpoint yet - // ask caller to reinit model - if (checked_model.length() == 0) return false; // load from buffer utils::MemoryBufferStream fs(&checked_model); + fs.Read(&version_number, sizeof(version_number)); + if (version_number == 0) return version_number; p_model->Load(fs); // run another phase of check ack, if recovered from data utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kMaxSeq), "check ack must return true"); - return true; + return version_number; } else { // reset result buffer resbuf.Clear(); seq_counter = 0; @@ -118,14 +137,19 @@ bool AllReduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { } /*! * \brief checkpoint the model, meaning we finished a stage of execution + * every time we call check point, there is a version number which will increase by one + * * \param p_model pointer to the model + * \sa LoadCheckPoint, VersionNumber */ void AllReduceRobust::CheckPoint(const utils::ISerializable &model) { + // increase version number + version_number += 1; // save model checked_model.resize(0); utils::MemoryBufferStream fs(&checked_model); + fs.Write(&version_number, sizeof(version_number)); model.Save(fs); - utils::Check(checked_model.length() != 0, "CheckPoint: empty model, model.Save must save something"); // execute checkpoint, note: when checkpoint existing, load will not happen utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kMaxSeq), "check point must return true"); @@ -586,7 +610,7 @@ bool AllReduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { utils::Assert(seqno == ActionSummary::kMaxSeq, "must only set seqno for normal operations"); } // request - ActionSummary req(flag, seqno); + ActionSummary req(flag, seqno); while (true) { // action ActionSummary act = req; diff --git a/src/engine_robust.h b/src/engine_robust.h index 7116764d8..32aee1f2b 100644 --- a/src/engine_robust.h +++ b/src/engine_robust.h @@ -20,6 +20,8 @@ class AllReduceRobust : public AllReduceBase { public: AllReduceRobust(void); virtual ~AllReduceRobust(void) {} + /*! \brief shutdown the engine */ + virtual void Shutdown(void); /*! * \brief set parameters to the engine * \param name parameter name @@ -45,18 +47,23 @@ class AllReduceRobust : public AllReduceBase { * \param root the root worker id to broadcast the data */ virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root); - /*! + /*! * \brief load latest check point * \param p_model pointer to the model - * \return true if there was stored checkpoint and load was successful - * false if there was no stored checkpoint, means we are start over gain - */ - virtual bool LoadCheckPoint(utils::ISerializable *p_model); + * \return the version number of check point loaded + * if returned version == 0, this means no model has been CheckPointed + * the p_model is not touched, user should do necessary initialization by themselves + * \sa CheckPoint, VersionNumber + */ + virtual int LoadCheckPoint(utils::ISerializable *p_model); /*! * \brief checkpoint the model, meaning we finished a stage of execution + * every time we call check point, there is a version number which will increase by one + * * \param p_model pointer to the model + * \sa LoadCheckPoint, VersionNumber */ - virtual void CheckPoint(const utils::ISerializable &model); + virtual void CheckPoint(const utils::ISerializable &model); /*! * \brief explicitly re-init everything before calling LoadCheckPoint * call this function when IEngine throw an exception out, @@ -359,8 +366,7 @@ class AllReduceRobust : public AllReduceBase { // result buffer ResultBuffer resbuf; // last check point model - std::string checked_model; - + std::string checked_model; }; } // namespace engine } // namespace rabit diff --git a/src/rabit.h b/src/rabit.h index 635e3ff87..5659798ec 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -93,21 +93,43 @@ template inline void AllReduce(DType *sendrecvbuf, size_t count) { engine::GetEngine()->AllReduce(sendrecvbuf, sizeof(DType), count, op::Reducer); } -/*! +/*! * \brief load latest check point * \param p_model pointer to the model - * \return true if there was stored checkpoint and load was successful - * false if there was no stored checkpoint, means we are start over gain + * \return the version number of check point loaded + * if returned version == 0, this means no model has been CheckPointed + * the p_model is not touched, user should do necessary initialization by themselves + * + * Common usage example: + * int iter = rabit::LoadCheckPoint(&model); + * if (iter == 0) model.InitParameters(); + * for (i = iter; i < max_iter; ++i) { + * do many things, include allreduce + * rabit::CheckPoint(model); + * } + * + * \sa CheckPoint, VersionNumber */ -inline bool LoadCheckPoint(utils::ISerializable *p_model) { +inline int LoadCheckPoint(utils::ISerializable *p_model) { return engine::GetEngine()->LoadCheckPoint(p_model); } /*! * \brief checkpoint the model, meaning we finished a stage of execution + * every time we call check point, there is a version number which will increase by one + * * \param p_model pointer to the model + * \sa LoadCheckPoint, VersionNumber */ inline void CheckPoint(const utils::ISerializable &model) { engine::GetEngine()->CheckPoint(model); } +/*! + * \return version number of current stored model, + * which means how many calls to CheckPoint we made so far + * \sa LoadCheckPoint, CheckPoint + */ +inline int VersionNumber(void) { + return engine::GetEngine()->VersionNumber(); +} } // namespace rabit #endif // RABIT_ALLREDUCE_H diff --git a/test/Makefile b/test/Makefile index a3f6b07c7..a48fcd77c 100644 --- a/test/Makefile +++ b/test/Makefile @@ -11,7 +11,7 @@ else endif # specify tensor path -BIN = test_allreduce test_recover +BIN = test_allreduce test_recover test_model_recover OBJ = engine_base.o engine_robust.o engine.o .PHONY: clean all @@ -23,6 +23,7 @@ engine.o: ../src/engine.cc ../src/*.h engine_robust.o: ../src/engine_robust.cc ../src/*.h test_allreduce: test_allreduce.cpp ../src/*.h $(OBJ) test_recover: test_recover.cpp ../src/*.h $(OBJ) +test_model_recover: test_model_recover.cpp ../src/*.h $(OBJ) $(BIN) : $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) diff --git a/test/test_recover.cpp b/test/test_recover.cpp index 215177f20..761226889 100644 --- a/test/test_recover.cpp +++ b/test/test_recover.cpp @@ -70,18 +70,16 @@ inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { // dummy model class Model : public rabit::utils::ISerializable { public: - // iterations - int iter; // load from stream virtual void Load(rabit::utils::IStream &fi) { - fi.Read(&iter, sizeof(iter)); + // do nothing } /*! \brief save the model to the stream */ virtual void Save(rabit::utils::IStream &fo) const { - fo.Write(&iter, sizeof(iter)); + // do nothing } virtual void InitModel(void) { - iter = 0; + // do nothing } }; @@ -101,7 +99,7 @@ int main(int argc, char *argv[]) { int ntrial = 0; while (true) { try { - if (!rabit::LoadCheckPoint(&model)) { + if (rabit::LoadCheckPoint(&model) == 0) { model.InitModel(); } utils::LogPrintf("[%d/%d] start at %s\n", rank, ntrial, name.c_str()); From dcea64c8386f2ea3b5037aa3787de618daedd5ea Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 1 Dec 2014 21:41:37 -0800 Subject: [PATCH 040/531] check in model recover --- test/test_model_recover.cpp | 141 ++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 test/test_model_recover.cpp diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp new file mode 100644 index 000000000..a7f4d7677 --- /dev/null +++ b/test/test_model_recover.cpp @@ -0,0 +1,141 @@ +// this is a test case to test whether rabit can recover model when +// facing an exception +#include +#include +#include +#include +#include +#include + +using namespace rabit; + +struct MockException { +}; + +// dummy model +class Model : public rabit::utils::ISerializable { + public: + // iterations + std::vector data; + // load from stream + virtual void Load(rabit::utils::IStream &fi) { + fi.Read(&data); + } + /*! \brief save the model to the stream */ + virtual void Save(rabit::utils::IStream &fo) const { + fo.Write(data); + } + virtual void InitModel(size_t n) { + data.resize(n, 1.0f); + } +}; + +inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + const int z = iter + 111; + + std::vector ndata(model->data.size()); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % z + model->data[i]; + } + mock.AllReduce(&ndata[0], ndata.size()); + if (ntrial == iter && rank == 3) { + throw MockException(); + } + for (size_t i = 0; i < ndata.size(); ++i) { + float rmax = (i * 1) % z + model->data[i]; + for (int r = 0; r < nproc; ++r) { + rmax = std::max(rmax, (float)((i * (r+1)) % z) + model->data[i]); + } + utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank); + } + model->data = ndata; +} + +inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + const int z = 131 + iter; + + std::vector ndata(model->data.size()); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % z + model->data[i]; + } + mock.AllReduce(&ndata[0], ndata.size()); + + if (ntrial == iter && rank == 0) { + throw MockException(); + } + + for (size_t i = 0; i < ndata.size(); ++i) { + float rsum = model->data[i] * nproc; + for (int r = 0; r < nproc; ++r) { + rsum += (float)((i * (r+1)) % z); + } + utils::Check(fabsf(rsum - ndata[i]) < 1e-5 , + "[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]); + } + model->data = ndata; +} + +inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { + int rank = rabit::GetRank(); + std::string s; s.resize(n); + for (size_t i = 0; i < n; ++i) { + s[i] = char(i % 126 + 1); + } + std::string res; + if (root == rank) { + res = s; + mock.Broadcast(&res, root); + } else { + mock.Broadcast(&res, root); + } + utils::Check(res == s, "[%d] TestBcast fail", rank); +} + +int main(int argc, char *argv[]) { + if (argc < 3) { + printf("Usage: \n"); + return 0; + } + int n = atoi(argv[1]); + rabit::Init(argc, argv); + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + std::string name = rabit::GetProcessorName(); + test::Mock mock(rank, argv[2], argv[3]); + Model model; + srand(0); + int ntrial = 0; + while (true) { + try { + int iter = rabit::LoadCheckPoint(&model); + if (iter == 0) { + model.InitModel(n); + } else { + utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); + } + for (int r = iter; r < 3; ++r) { + TestMax(mock, &model, ntrial, r); + utils::LogPrintf("[%d] !!!TestMax pass, iter=%d\n", rank, r); + int step = std::max(nproc / 3, 1); + for (int i = 0; i < nproc; i += step) { + TestBcast(mock, n, i, ntrial); + } + utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); + TestSum(mock, &model, ntrial, r); + utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); + rabit::CheckPoint(model); + utils::LogPrintf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); + } + break; + } catch (MockException &e) { + //rabit::engine::GetEngine()->InitAfterException(); + ++ntrial; + } + } + rabit::Finalize(); + return 0; +} From 2c166d7a3a86c6ef62f68c58961581a595b14db5 Mon Sep 17 00:00:00 2001 From: nachocano Date: Tue, 2 Dec 2014 01:19:36 -0800 Subject: [PATCH 041/531] adding some initial skeleton of the report. --- report/.gitignore | 8 ++++++ report/rabit.bib | 69 +++++++++++++++++++++++++++++++++++++++++++++++ report/rabit.tex | 65 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+) create mode 100644 report/.gitignore create mode 100644 report/rabit.bib create mode 100644 report/rabit.tex diff --git a/report/.gitignore b/report/.gitignore new file mode 100644 index 000000000..aac9702f9 --- /dev/null +++ b/report/.gitignore @@ -0,0 +1,8 @@ +*.pdf +*.bbl +*.blg +*.fls +*.aux +*.gz +*.log +Output diff --git a/report/rabit.bib b/report/rabit.bib new file mode 100644 index 000000000..b95d108b9 --- /dev/null +++ b/report/rabit.bib @@ -0,0 +1,69 @@ +@inproceedings {paramServer, +author = {Mu Li and David G. Andersen and Jun Woo Park and Alexander J. Smola and Amr Ahmed and Vanja Josifovski and James Long and Eugene J. Shekita and Bor-Yiing Su}, +title = {Scaling Distributed Machine Learning with the Parameter Server}, +booktitle = {11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14)}, +year = {2014}, +month = Oct, +isbn = { 978-1-931971-16-4}, +address = {Broomfield, CO}, +pages = {583--598}, +url = {https://www.usenix.org/conference/osdi14/technical-sessions/presentation/li_mu}, +publisher = {USENIX Association}, +} + +@article{DuchiAW12, + author = {Duchi, John C. and Agarwal, Alekh and Wainwright, Martin J.}, + biburl = {http://www.bibsonomy.org/bibtex/241ff9a4754f77538c4d5076acebbf772/dblp}, + ee = {http://dx.doi.org/10.1109/TAC.2011.2161027}, + journal = {IEEE Trans. Automat. Contr.}, + keywords = {dblp}, + number = 3, + pages = {592-606}, + title = {Dual Averaging for Distributed Optimization: Convergence Analysis and Network Scaling.}, + url = {http://dblp.uni-trier.de/db/journals/tac/tac57.html#DuchiAW12}, + volume = 57, + year = 2012 +} + +@INPROCEEDINGS{Zinkevich, + author = {Martin A. Zinkevich and Alex Smola and Markus Weimer and Lihong Li}, + title = {Parallelized stochastic gradient descent}, + booktitle = {Advances in Neural Information Processing Systems 23}, + year = {2010}, + pages = {2595--2603} +} + +@article{Dekel, + author = {Dekel, Ofer and Gilad-Bachrach, Ran and Shamir, Ohad and Xiao, Lin}, + biburl = {http://www.bibsonomy.org/bibtex/20603ddb3c1f66680cb38f01683f544c3/dblp}, + ee = {http://arxiv.org/abs/1012.1367}, + journal = {CoRR}, + keywords = {dblp}, + title = {Optimal Distributed Online Prediction using Mini-Batches}, + url = {http://dblp.uni-trier.de/db/journals/corr/corr1012.html#abs-1012-1367}, + volume = {abs/1012.1367}, + year = 2010 +} + +@inproceedings{Low, +title = {GraphLab: A New Parallel Framework for Machine Learning}, +author = {Yucheng Low and Joseph Gonzalez and Aapo Kyrola and Danny Bickson and Carlos Guestrin and Joseph M. Hellerstein}, +booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI)}, +month = {July}, +year = {2010}, +address = {Catalina Island, California}, +wwwfilebase = {uai2010-low-gonzalez-kyrola-bickson-guestrin-hellerstein}, +wwwtopic = {Parallel Learning}, +} + +@article{Agarwal, + author = {Agarwal, Alekh and Chapelle, Olivier and Dudík, Miroslav and Langford, John}, + biburl = {http://www.bibsonomy.org/bibtex/2e0e1d583d5b30e917e67124acbe3acd4/dblp}, + ee = {http://arxiv.org/abs/1110.4198}, + journal = {CoRR}, + keywords = {dblp}, + title = {A Reliable Effective Terascale Linear Learning System}, + url = {http://dblp.uni-trier.de/db/journals/corr/corr1110.html#abs-1110-4198}, + volume = {abs/1110.4198}, + year = 2011 +} diff --git a/report/rabit.tex b/report/rabit.tex new file mode 100644 index 000000000..ba1eb647d --- /dev/null +++ b/report/rabit.tex @@ -0,0 +1,65 @@ +\documentclass[10pt,twocolumn]{article} + +\usepackage{times} +\usepackage{fullpage} +\usepackage{color} +\usepackage{natbib} + +\newcommand{\todo}[1]{\noindent{\textcolor{red}{\{{\bf TODO:} #1\}}}} + +\begin{document} + +\title{\bf RABIT: A Robust AllReduce and Broadcast Interface} +\author{Tianqi Chen\hspace{0.5in}Ignacio Cano\hspace{0.5in}Tianyi Zhou \\\\ +Department of Computer Science \& Engineering \\ +University of Washington\\ +} +\date{} +\maketitle +\thispagestyle{empty} + +\begin{abstract} + +AllReduce is an abstraction commonly used for solving machine learning problems. It is an operation where every node starts with a local value and ends up with an aggregate global result. +MPI package provides an AllReduce implementation. Though it has been widely adopted, it is somewhat limited; it lacks fault tolerance and cannot run easily on existent systems, such as Spark, Hadoop, etc. + +In this work, we propose RABIT, an AllReduce library suitable for distributed machine learning algorithms that overcomes the aforementioned drawbacks; it is fault-tolerant and can easily run on top of existent systems. + +\end{abstract} + +\section{Introduction} +Distributed machine learning is an active research area that has seen an incredible grow in recent years. Several approaches have been proposed, using a parameter server framework, graph approaches, among others \cite{paramServer,DuchiAW12,Zinkevich,Dekel,Low}. The closest example to our work is proposed by Agarwal et al. \cite{Agarwal}, in which they have a communication infrastructure that efficiently accumulates and broadcasts values to every node involved in a computation. +\todo {add more stuff} + + +\section{AllReduce} + +In AllReduce settings, nodes are organized in a tree structure. Each node holds a portion of the data and computes some values on it. Those values are passed up the tree and aggregated, until a global aggregate value is calculated in the root node (reduce). The global value is then passed down to all other nodes (broadcast). Figure \todo{add image} shows an example of an AllReduce operation. + +\section{Design} + +\todo{add key design decisions} + +\subsection{Interface} + +\todo{add sync module interface, example of how to use the library} + +\section{Evaluation} + +\todo{add benchmarks and our results} + + +\section{Conclusion \& Future Work} + +With the exponential increase of data on the web, it becomes critical to build systems that can process information efficiently in order to extract value out of it. Several abstractions have been proposed to address those requirements. In this project, we focus on the AllReduce abstraction. We propose an efficient and fault tolerant version that can be used together with existent big data analytics systems, such as Spark, Hadoop, etc. +We compare our solution to MPI's AllReduce implementation, and show that the performance difference between the two is negligible considering our version is fault tolerant. +\todo{improve this} + +\subsection*{Acknowledgments} +Thanks to Arvind Krishnamurthy and the CSE550 teaching staff for their guidance and support during the quarter. + +\bibliography{rabit} +\bibliographystyle{abbrv} + +\end{document} + From 40f7ee1cab2affd855758f374d6b89c7df5c4bb4 Mon Sep 17 00:00:00 2001 From: nachocano Date: Tue, 2 Dec 2014 01:49:54 -0800 Subject: [PATCH 042/531] adding simple image --- report/.gitignore | 2 +- report/fig/allreduce.pdf | Bin 0 -> 44692 bytes report/rabit.tex | 13 ++++++++++++- 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 report/fig/allreduce.pdf diff --git a/report/.gitignore b/report/.gitignore index aac9702f9..8c9da0f78 100644 --- a/report/.gitignore +++ b/report/.gitignore @@ -1,4 +1,4 @@ -*.pdf +rabit.pdf *.bbl *.blg *.fls diff --git a/report/fig/allreduce.pdf b/report/fig/allreduce.pdf new file mode 100644 index 0000000000000000000000000000000000000000..17d846d2a6cb47ec4b1d6eb569d92ce0dc1d7817 GIT binary patch literal 44692 zcmb@tby(D0*FOq_BQk&(G)RbacZbpf(lNBuARyg62qK-*FeoYAC=E)NibJQ;Db0|- zFR0IbKfdSvo$FlJ`Nxr&9V4&HVhS1HwbaL1kxR z1rZUUVplY`g*jVN@d96|QL)QBvvD?e1U_w^I-5(Go7$O~Q;CT|oSYrapFW4Uqbo%< z-fW}74>}ZUPrk?SS#2D8_lE^NOtOIY4O8aUR*;I-X3Lr2Qg7?iK#V-KI>{l(5f+h% zD&>n7(ah-~Ymiid@%M)v7|N^W`xua7MCr3ZF6`Bz`}Xdtv)h=b zPNEhNb6d0D!~*}j>l zI~PY&b0;dkUtdVt**XKiJ5gPV50I6Tx!JR)kL=v3^f-Voc=`FLxOq4YAb_}j?{hit zkC;^)?M&6povHMI&7@?g*yW!CTR!^zDf#376BiZd<>ml2vcNb_&ff#6*maamtjtZFuZGD#=cE$&^UFWv zU4He4yx&$J_-n0SLiw`NFo<^JfIvlH;E zq}_9SJ6m8ZMBxA3EH@ghYahtEuon2yoWnoC>B#r@%U!NHxkI@72ro!Mn>T(jAz zt#fmFdi1l5)rez+*x~&nyTEIl;eZ38>BBu)_eVV+7Kp&3Meq@3){4c+cegpD59zx7x7#!3w4oA6{2TDZTJ=hzKzOL}( z6lZFXTJ>r!8bOyd`UCpP7RrmGPN-(YK{)EKRbt`Gbf^#vH4T~qx^KsL;b>#K+|J^` zQs8_v0$?*LBJlaoU~xI7`(9|QZTXt{xDN$S^}Poie$fW3uYzvIasjogxq(B4A7*e{ z6D@K!G!tDQfH~m$?2VEer2aJT?^~ipM!z``d~nIm<+2|bs1W0<{HRR3ZVg@Ezt)4# zT$~@uXO5F7@P z`WNGJTB1>R>PldgRa0NbrJC2lwnVGZa$pl%68NQke~4MWzjweI9b){ZoN7dViprWe z2gq#jFV>DwgMMu9UTvb=TuPp?D+v)cD0uUavPqbzNV?i^KlD7xz2ms>mt~*IHp(;I z_v58jojs@-)|!*B#xIgC*0f=rQ?22H`u;`AJl>=(VEX@MNk1zQXNqE<%}XW9rwSWO z5FB9(e<{VX_FuO2;PCS&3@`MXh0=$#yH4AJoe%$5z9iVD&SYEJ zcjvAiKDjOnbrCMB>C#{1%YA>7O2;~+wm$-0BvP3DYva?SPVodAsRKxXhDBWv;X75+ z_7c1_$HL|};Eoi-^e)@1AAhfBxSd6L*0q_FpUTTb=#^D|>n{tE9N^f?Z4HpJdS%S=n6U z@$VQmzJ`mY$yoKbCf~K;`ToS{Cr7inj~+gL%ir{&+>7?F{;FgkdzKr|%5+FMtR&GQ z!PKn(?K~3V@6GY3DA!u5Ju1}eiz^hreZQqcUM34C=fhSI`r8p65L{^e{|P8teitYI z-vMRjWcu)5fZ{SI?zTfIe)xfX=A=qA>-oh#UW?Ws+duD2a#)UYw1#g1jC8oV*b z>(h#+xAT{w%@66Wk&!u#uGn%>ZED*sK)1`I8W&S!ZpF~-Bswggxz_3_kE=Hg1XTGv6_=K?WNsAlkZc_SqBO^mtQ{yt81(d#nCU!Ix1u;pMA30wcyVpO2B%&h>Lzx~ApTeArv2R{uU%xVM6AMq_zErv{=Zp z?1y!ixF|35<2T|nT+MtoPYGvod1liZy$aRVzOu?c`?RbpjB1`{eH{yimr5j4H-C8> z%)&eU;9l30KXE@3^cr;BO|fa@xkAee3M98i$88yYNnBOWS6IlC=(p$E`|z=os}P*p%qKboVKS!*6*Mp0gly>5bJS?!}G+<&X>_p%HO9XMz7nZXYAjv z9qSmJtcJdGSy@beMp>mb=&}`I7uVApMG;_&=Dda%S#hRn8hg_$?@uUEfj@>58F{OU zmAc>=hhbw0JZ`Nx73MFEx7GJOD|UQeLv;e%EK*PDb>U+{?k^AVpf1cG<^H+1oJN#)jB+gPA(}PenoOer3l(-(IJ>@qOb>PFRiN z;Rr(%-5uln&kD1``*esOJK96&U|T!u=L9#aWPqTq+dga(^)-I6BFY;VR+>cAv_IX$ zPvo}=$3FN;Dt^vSr0tUG)RcPPAoDv;_(LANYr$r3$b`bz!IwEp&q|_S zCoVFAUjEsh)X-4+#kz3er-fKNZ+Ye5^R%V~n(AonX@{CsRJ>7nOF8j1FoSMnJ5}K6 z-`d-074`EvM=dSg)!(Q%o#3Bln0miXQ~Tca$&BL*&t)1T&!1(hUrHOQYYb1P*~v+l z-kP6q2bZ4BW&F-Z&tgPg^ib zYu9*;QDJa4@Xmj$;iNBy;$W0gu-;YTkT}zSWek=0&uUN5QQ1xQa{&_yw+XSWoCZ(p zqqIj1rcudW)edPTn@DxU#gU&=tifpq-sFKh@~O>>{h9qh>HkQc>K^z3Wec5rfbmqJ z>dt=F-Br)TI)v-kpj=OSVY zU~APwD>p*z1*?TyuV%T59AKE7Mdh{Rd34rIFPA5M&xw1=76{(hwl0^jlOm}8cL;pke3Im6{(NSIy zXvIB3^v!=fBMr18f3DxSB3oy&{+!hkEuS^;e-J|h%J4aW>fw~ryf-;Gb#Ca$_y=sC zpvx=paH|?0rp)u`wiR^9NMKmn;L3bPQ+fU9kNp#9+rci#dmF1=hj!_avtMvFVV=cAL$AinkxUt-_Y|~lDT2?m3oJ0HSeSSv+&LO z+`c|@u--C;T^M^^e5zSLd}6bC&)d?jG)A80Ka#$sjNUClXxpVp$n`p!x%mrrY>S*& z<&Sgu4D~!Az4}hFsJH)!d|4X~>&wLoA-?v$kq{Y4^Z3W0nyseI;pi~F z%E-6r|54GhD*8PAHEe<;+7nG}16sHAwjvAbs6x!WVY+*qx>Mz ze4caimD`MUm_}lg$3+x(Ruu@Msos7b<(b4y=enw%h&9{tS%&Z$SP-6YTa4i*y z*a#@u9WvVYChqZo?ysK)aoO1+qPJ`(*3GT8S&q%~F?`>6z43qW#~%Dajy|(>Q&Yvo zrozIV^JM%xqXpivJ!`Y`l?3G2Ptt{GyI-yojDC`29k`iQX-zYlm7aE_RSO+mVJBm!aW<9;`glcPGmAkLA7w?PJfJ-YsyilZ;K1Nq(>}JFEtmLDwVYcZ*b>?yA*p zG<(0Gn*B$d3@Y%5sWDXFI#JU~9x6^*j8Wn+f=~~f%Uefiqsezl@WKiP!z?Y0DY7tq z5rtct&w!VAysq$EwC&Tg00@=v~NE!5@C7+nuyzU;4kl+vqZ))`@JFbsu`9G?;fL z$i=2vUEUm3>aK0{G9}%f4aw5|N7h^_5<7&G-z2!Q5^Z8;vf6nlyzW!H zOZ>)9ZS$_;Uu&vFA;e%)j?e82*9FZ*obel74- zGt+meZ!WHd&FkHkIm-ad%snNi>->vnrF6T=u&}nbFO}S~ zrQib zwZ>vctqjexEXi1WsU`WhKR`mMuo)tn<1tU}&y>B$%X`wbRd9%8nfMbQIQ&6+^+@gH zJ-=ngnWv_9?9=j;-aj9jF$PrUdsPdE6*^WszN(!66txtDVm0#G!^EFww%bJ&<@^?T znJc)a#x||#fl);Y28z;iosAmfHiz|)6u7$NhVbv2%HM^SOyUWm-3k5>+jza`-N6Cf zKk{cK$lv7Y#+Kj19(T-p=9hU`?31cz4c4vWqIhm9X)E=lX`k-hR)LxRvxE~?ee=cnNlw0(H6o~Y z4sSLxAuqof0%ISMYJNCYVfq`DB;b%P$>zoKQ}D2B^~ zqTVw7m=+KFrDeDY!K>r?>+JJOUrX9MSb||@J<$j^W86(vQYQahB;)Ookz!0^i ztY1@=4dNqR$X!}IKaa(Ye)4M^EmPU_3@?MKL*$2aP;6h9xQA%9*|?+RV^H;!gV}HD zLLl#FcxU~B6>tIqJ6UE{hK(lk@1&mHC96L*&kt;DFS!~wob7*F(mEGKqirYhgS*9g z=bCc89GuK`UKiJpw~uIq%&8OJWpiv0&8<#nVOSG z(l4a!C~a9hpC`hgk7BT~i}F(>f5-?)8o?%CU*D$FVDw!QxH9S|mjYIEzDD>?;uvCM zGYfk&Up{wtCtz*wq>tiP#wTF}{6PLu+6x8g9%e(xEGy{NR8oKo!XozXF?h_iEZ+R# zA!;}ay;@I_Nu_y4vWc7PjE==`#8__Mqa+EUAuhq^W1(|jzhwH82-(DMdvsx<5|5`D zBvWj{NL|???OM_;Crba+yp!BCa6@5sa^LIaT6m*p2rm1`gV{T-@Jp^f7lLR{OW^wc z1=B5BOEzHN3ZvS3J--x0bferC>!a`@PSnBBT)p+d&G~v&8ZxwcgSf_TeK^QpIN zk)ok| ztKn{2pBCBJN9LQ49RdMun`KE*H(RTmVUiPN9z3!6=~XXF<6b$iy~j71u&jPC#CwM# z1=(d-@`IqH7@WYcyMo;1ttynPo|5ix^f zaOm-ODmNMVe zJzwkf01?9C)zl6rrqty>zHnI5;EPWS?7G_E2(SLenvyh~XN(Z}-~Edogd-B+t7iP?>kof1$XP zrg4_%OxIi#Pq!8MzO%JWgXdiMc@?^mz};5d9Y|C9O1-tzQX7K)-OR8+w^e~+ls1Sa zkUaG&jgYu>y`7nEeMC^%7BVJGHOO!LKAH)d$5xtQ$-5F&v*Xtn2X)C|h%vE*VMAQ5 zEC1re|2)In;x*-VuzPBN5fjOt7IIsBccN%(Q;AJQLwO|rY@gDD9?kZVCJ~mK>tbQ& z?~rF%>VIZ{;StaILzx`l%|~NzAn*t|p}W(FHmX*;N}#--#j6?FS*up75pgAw78~&T z84)UeY`p)VoTSO#!WAD!o5twki>z5;u>8Ton)q0U9+}d@akyBIrga;?P|cTc@+(=s z00jX)8m_y2}TP<3@|7;+&=ly_wf>cGDZX+6^N_i6Y#97=LJF*6sKm zrPHCRTSaF0Xa)p#+LBjuI;D*W<>+-6z_N6Of()mQ5rGSH?S+jPC|eDo43l3+x^AXR zuX=Uv=#YCn_wv}vf1SH2FTtjabv>zp67A;rH0xP#L#>;G_9oR2etMR0$ek-^#3m5f zslVg7B)qX`C}`#wHL=|KUTzKuB$D8DQC%=OL7ylimwP1LsNz@OVe3wbbc zEEnP(sbD3PJAF8%E#EF3xa#$E->Z1RkJbkP-?+l1j_gZyqS}hQ`^^*^jVem{*U1gb z6P0B=QpbSMlo1HpiR@e%%LsSJLKA3ehxjmGz;>niTyV3?VhgZB9kr{d zjMT5KuQwyhdee(^2ke7qTDKxguRe})Cn1dQ`vPs$K&YYkLs5^osJ>;~CMyBvLq`q( z&Ul#ZKMq73)djNnc!-nKS333GtoeD-%a8e~t9VB>a^_KN-eZVc`RQ? zu(xZfcT*i5Fs>AWXe5OZaf~p#NAruENakBk)rNPcSIeZ#8d*wgaiqsiwS1PZX_yy-{`d4ec5G^4@GywNFc5}Yvc3$)2)x; z2Ad7>XO0G_HsOrSgwFXQl}`Y@D)4P4stfavYV1Tg+~)=u+cBpIjf+}C1FuAO)J{mi z_I2C5jNS!-#$7d01N8&`y$z!2!j%>TY8hWunI_8GH0{=YJZT_dGZt9j@Ty@MygJZ!Sd1-|HJb-c0nIPmAsp3{%?+)9{ znqpa;sB3qlJ>3s>`+Wx8X(i2V@njIvCA9%l*w5Kf;ij^R_6P^y^}EokxF;tCU#(A* z^&6333E5+n;Xi6iCN5a~lKZJ+PS#_yR-dS@s1BV~o4t^%0y&XSxFdgcGgzdJ?xEDV z_YefFb_DqS^5OAx+2av$CmK67gVJ*muKyLrE|={;Fza@2G%>)C*Uk9f)P3d%tssu6&}%m+Yh zFu-sqIu$$GBf&yl&>0D+s}+^<4X@Tl=O$Xuf9rjGAqk%=oq?z9}e7@m6u|zd5&Zg_^HH(q{l9jWpTI zR0uTLzZ}Y}1nuuyXqvFVACNp*a&@Qrh37^25~={%x|&hwyFuK{*KzU03nW}V$dr6t z_Q`L&+i=@SgCgMyLnIk85nKCeX%_(gWFof&6vaJjj=pe9y&`bF>CyOeUM${Qv}~PS zl3?ED=4-$T;_*idN)nbXo_t%)xxa4*Sxe=ar+=yL2s()k0qv-(aHZ}!Mu-^D*yZ$^ z8IxcCMb3jv2^U(cz#LpAw7~8K&_2 zkP#aRa2FlXB8q*bZ~@_s81O&VdSUvzCZP$OI0mSaWAxA_yc&k8LeoTQ!azv9+pDnJ zdB=!`uzH2NA}@a>k3Cuv6a#7A@!y*Hx-MuE@7-J@Z`3al^nPW%C1Gg(VPN{_YqjXb?F z`4a39jqZazFz8Os2-n!Av=-l0^|9*-dXqh!aH53(Rd?M&I@U3P9qREnLc~_j=k8w@ z*O~PoGO|uQgyyJtE75>+Vyx`Vw=Eqd-q?02qwBl9Dlc9Q!~s-EfWK;G)(w3k8pz(D zIRKJttf_k*F)lvsG6xO%L-}_iuB7$|Utp(=PAO@CbqK z4{4AU)^-!-w|Ejn_wu^4W={s2Xq1lJ6S*qqQPqGBg19Tr0sATk!%ifbr~o}N9(E7$ z?i9?4PRrd>^UVmml58|+zcM}!X;{){v*B1xDd&-4VGQPTjr`%*KLF&1I4t7IvX@!g ztHKssVg7MhhSG_t2rsdkC*a`%{|7&T%GWyO&zx)4``4APjw;Kt=qEsA{f@)lewf9q zM*;^q8X+I>(U*K!BprG7L3M%UcZs)UoM3dkPSqE^(^c|3i^(m~9&9-5D#r0_pPKC< zys)Iss@{{h$&(SGi0<}5hGi$;8%xMO91tl^W{V$k0&#H)Llt^e>v-vC()l$BuO5D+ zv{Cfiv4AZleYn&~A5m0wGPD07yXJ@fyeM@V|%aUl#=eGYfLXjNRo}a)~ zl~#})0%E15jeyK&w;;~<=&^jq)^c@-uKia(2-G*RnpAvH!@vO1Bu{WbmhM8ep^?c0 zl!ffR>6N>8Tm8GhCtQ>WfIS7jqsL^E6?B!m7$ z@HNRU4BYqhCe2oV;e}?{!9Y3Gd;Fb5H$)$}&ErEPD<-=VS)DrGjpsJ>3nIpT15b%fj5K?|<6@$mXBZtR^e(R4XEPM5Y%nUqdzT+%zK>0lzO^-1TUyZ>98%hhzyiwFpQzJFgp zLaJg~f@01>r`sId$hx5UmF^i%PBy2W7jlIE)BOvA1wG3&6Tl8NC2~6~_EV{La5sN& zK?`K9>DWu4KZ7<86jXfI22aa_f0&o4!a)Q_XIp+z$^S09ZM8`zn2)8!TwbyWF_)D) zC{j>lI0+D@fkr5c%3arQNuh_fCDYIp=1Y`7mU*uFW;)P1rNJFeM>25`A}SCX znX+KuYI;Wg)D$mMhL!EXY<>euv}Yg}aJc16v;@+6iPogUW16fXbdyr!;p54Nh7^y$ z9VHUH@~a!Q&1c1@$i`}cEUi`@e82T3i;H(HqXY~dXRQT=_l-Be{sKMZ^`&EeOrIa3 zC*K_9FfVE+C794{9cIu8b4B1bzi7BDn8GOtT1*07%O;XmYZnggU6@nN-qOX?l?qgPe#UDGO~;gSS36)q-wF?e#7i zwtf5O?>9;W4A03)puqXB1q~8dC~N6}9m$;Vk+S8U2$(=l2|anX`Fy{0!!WcCnq^YX zPl`R4wcMd_|E6u;i5+xt2W5Wm(mGw1g)6&$i| z%zbS%RNh|KFlL2xNgW|eeS^C1>xAt-Og^m^cy*aqqe1K&t(wIoc|c4mk)Y<&1$Ohu zE<^tIl)=8*zEWJhE`OsZ8ur;5R7zkok~vR;l9D&lfscn?tP>EtD)6@;ojWILJ_;g5 ze-v5mf8E$x5{FcxtDNvNn$7c94>5js_GpqLZ`-5+LJfihR;sZk)u2@-E&TlLL+DU+ z=eXoee|+St0<7wD3>t}p)6<5*+p2H}Mt|BHy*DKe)}kUC&)ho~pa%(%y|*oce&rTCTCqWJA;`tKd|L4jmDDYaaJUI4J)3TkmBTwtO^y<0d zT<#0YfWI@|dIG6Fou?Of(0tQ=r)~T*m#eP`dzm@F#yErV$gQxCNV=-{4!bORK@BQy zu-W%Dx?LYgDl5<|=NMku#~vAjNL~{pXW|e7m&trsFb`kD>Ui4{Vd-1m?=HKf@0tWD zDrt%GJ2g$dSD|eTgIcOul%FDz7T7=_DthbB_84iU$*!+oS_HE7F_eZ;D1c=O{EKkJ zkH18%|I3=AzO}FO5p>DGV@D`i+E_;>Y|;kcD)0ly^7<^$Qn#T=3*0=wi6K?VdGq-o z&OKPF29*Q&!O=zIv@;>Yf4Yl8>SL#}Kx54QyuoX)i|Vv+S$fgNFflOYz>mPu5jT$J z@cb;ps9mpV_hnEP$YKNjA<1M+l>BuVVcsd6ZRF0z1Y9X_DLLmdlzBl#ZN7YMW6}%D zlE9Ea5~D)ihfigK+LV3KY&AUiu|A49m6pGqC#_Eg^8O|V(BCXCYkbUzE6CYbn;t>30eZwXOCa-4oBV#V31H1Fx_K6 zQ3PvgNaSH1KF8NlwW0u0o|e$xT4R3P|M49#>1g_}iGDju)StBa^7hwrUfgeWe#&j; zWIjl{tumN!1YLVQOkC!oVo-yM9?VhgqQ*~nlCt&HUHzmqL&#&M@o?H}#s^8!LRfCK zG8c0~)6dU7D~L)reFKo64|`sHvgy_w)<}x{95tbsYmV1M6(RX9&{>}QA7 zb$ebmw{o4Cdu>#Y;2X1FUBB)}hx%0A)~k9CzBr|y5s5ke;g_$`$+H?%c^f?M7v`>1- zrCBsZ@cgw2(28(yiYc~B5x()mEu$4)^6(#(+e^QBqkZkt&!o}&hHt&cS_|Dq!CZOr zJO*I{NP4Gx_U%V!bDq@+Hx}eaidqUwo0#Gh4j}7N>S2{1m)FN{99Y7hotq;33~0M^ z?EH3fb~p?P_rcIM2Xnv{+GHl#o`SwuO?&BjPoc*zH^ zfk&`8<4Zb`3@p)91*v@zhDQidc?;r-QX7BblmFO^&2w;R>td09hHci=O(gEg;p1?; z$Ye-%wuDe2e4{%I&qr7Kc%C0od$9jK{1rEpmEPwb{_Y*S1uhZ3M<)FWltzoA} zlvn2Qkiv;j)xnv=DO|s8NCi0kNTR35+Vb9jIoIxY_Pb5I63#021Uk6(ApJQb(_Do? zS*P8Pts|T!wz?41)cqP%Ft8aDuPuA?sbiK;f`iL>&`Bink&(mh5K_LW>5PuicRjKW zXu>M26)#+}vK#UpaA+x4g>y1+SQFMJHy?~rpp<7EpBGO~Y@H0uk$Bo%l&QWLuM)rR zR}rO=2?IM@0bw(l2jsl^D{$(xTnu!(jnH^qT%6<|%xxCSKd#I;|AtZKLpWKvXA=n^ z8!f?Sqy#;k{rmAiLQ$!agfhOWszveb@?%3({l#W0&#`*(^f-z(hc+EDpXq0!(L{0nZ;>x1bzpgiP9tj?$nr4E!nair( zY)nnF%2O*Xyu(qpgS>@jL$!WYn@jb|CJP04sqMATb;)1MFi=Hk0C#L=Oe=J|z37Yu zw>9rtongxO51i854l~!<)vry?k?`bvIIrO@thLi?!7{$4GkVcL1JB9=F*1}jx|LP> znZPbGISZlY=d0x#OJ3OLACxslV%a^1&iOyHw6yJKOP@S`cVJ0~CS%=-Zlc(lx{g6W ztaH_bWdZ-wt*evbw_3PgVE^Ufbccq$aqlz;n<&HY97SZ*U_Y^+7=}OMx9`Ata~v@i zfU{1T5eei%EsF!&z#+Z7(d)CoK~Qh+qqc&z*9MdsN1b7XhKA?tc?FvItWJ$qd-ozV zjM2bm2dtCM1#H>yGW?5Vy;38AK85ooaRZ69A=-9g!j_z4n&)Xdc32xXB$qHDHJv0B}D1>S~1?WxDy;bY3$aIZWf}!f8SVK ztts%C6le~zCeRBLUHxEpx?!HE@48^5zfz;XR&s&i{H5K`VZl7HdF|7X%i-tv=eQlC zXOyLhV@^!b??KOZNJs({4_X&b-tNzT+fUSZ+jN*4T$YJG`efKZOyA(SrpCEO0Iq2#HL`;LN(jMKJKl#-UzUh6^sfy%~R*VBF1$Tc_D0?dS(!z5(i?GVi@0Ndx% z8b!vq^7x8Y!+?G=&BwbjdKu1n%Y{B8ETK;0lsPZPo-sw&fHrnXNH7;ySU6zkd=y`k z{f;Bam<-s@Te2cHoV4`xucrcqs~8<&F|e~t7e%Tf^`ComxAP4$ zJA7Vo@7}v}W0JjSXEEyGgZ1m21af-t96&S>LUKivJ?{-xsvq_XmgQs@YnXnj@+Tcr zL;C?C>Vd^UgwbB>QTJkMnm_OMSrI;#YFoySo*iK)8EYK4QSF~8gKxTPH}Os_CB6R1 zhuyar0~Gb*U(c7*dT*-3;|X;55HaP($9Nnw4r~Tid*K|@UZ?3_5xso2wW8>lJlHnE z2MpF*F*6qmzShE4y{SU;*HjgsMP%dNyc<`7|E~i|-KQCz_Sk9l0S8CWG6q|4T+7;SkAO>k8P$E?JBdz6k+A62oUf zM9dHKSQ9amHX+~*#B$fu7h^vYAp#b>3fBPJ^}LJt()W=g~_zH zH#8&&bZ8MVguvOg>_)A=T_Uca1#d5tjBmj2Jk4p6zFYAuLe7HMNyBuqJ3km=12kDoFr4PfwOQWmn>>k6u~Bi5-;MY%aD*{?GtqEO*ovDTvfeM$n9Xq&T16_(= zMEx%hwG3dA-3j4WP$jS8u09>O?=H!0tGS(u=$8EgAAs>^r3A^$OrI6RXndjxjOSkJ zn^UV}eb`BK1h~zazb_~b_8f#sz5Um5hW&PXQ7#e5SeG~WxCb`RGMJ>47yhqSIJd_- zy+w520uvtLzls@r<*ehq9{Op&rvN?VIF!eN*N3s&m6Twa6A^Q8`vPc+vvltCrm0Lp z9hdPbfbTDGzeTpmKL+71b5D!g-HbsGAAwEEJh$o!3|#O7kV?-3- za;WgMCvl;98J<1nL?#Y0dR4@%0$HrOB8KNITUAxuh0hrApP6bH`+LinwXU}A_plb! zzfXJ7W# zjw{W|2|;iY2D_rmyUfZmMqSi6URrE$xDwlayYW=(uI98pSycMq@*V29AU!hhF@*b< zPKQfcqEX>EHT@#AvIMX0k@Z=bFsDIJFy^^+?q_)K$R~sAMpkB7Y9NZ%Ebtjqf57Kd z75S>!z}JBeucU}V_4oJ3au|X4CeR(sR}c$cj8|!$j4!)kQq0M{5fgWnMZQ=f%z>G9 zmcUGHCw#!Sw!yL8Qe;7#F6)!fxdlSWK}%9LqZ^A{f($`ZU~URsQhhvg{@#55-ti}yylCCNosaukv%u5xBr zc6rO}wH9d`A65vBzBSfCW1;40194dShTEHH-Idg=A<=}pEO2u%7)b2sD6g9^2$18m zpdCbiz7ZCX&pAs9)B17ksVV`n03s%?humYmywf=sZ8QYq|N4SNEuIU|K1*uEn-)?a z#zpphwl^7nhE4v}7`+qp?PbeXe@Rl7SsnlJTRNmy)k{}mCoW{+xxMkD03SR=w z3XAX?)0?$7faZu+N>HD=KkY7}%c!yWAPXK&9qf`=S9?LwP3jU+aaBI)IHfq7u3)s_ zWesA2*8ma;Y!COXnzm6bPXdpMu3Jfc31C5~{)mPuX?TnstwM@K5i!)u_i4PKPn~1R zdn3oEC<4D86?!#DV)t1Iy-Jv&Uh9ELb$dRwtR4z_OHfU>&>3~l{n8lv4uqMbABoYV zvwNH+iyzoGZmh2n;F)PvzV$a1_l^J@^0130$~gCi%v2l_zJUlx1RM`UB8u_ zr=taz+96rt`|lbbbhuY^CmDc=#rG+?c49QTD?G`Sen)e`OW$5z7&@TAn`O1tO07o%_N*~6mm$>go4%`?sjZ40-a|n5a-DRB6 zLdlYlgv6oaRpv2*X9IgP5t6e!oLj}$mFSq$FI_xoOw_gDZR`?AkWuTpw9+4VqI=O3 zYmiJ=jd!=Dy)>v#sU^dp{n1vt`Vo-qgh4ra1w%iM1E89grHskPRY;=q( z%ig-8JAd`oQPPR5Lz9p?yst3T(NKK{o2|z5&z;@^HD!{po3e+ErM1uZ$X|ZEbd?du z1b{D9G^c_T18zPg4V9J9GK~N6hVfT%H`E%77}z_eaXyYNbYqBqOEnxEl1+ z0l^zzXqVD`79G9Lu1XG>rVDiZDwU3ZQ`*QU$f^tvXfElB+2tyPW*sf032l4JkjD&p z-tiJu^3Ux1y5~Dw$>Q_1Q|JTM2u~4p^D(I z${tx`gDVhes_TvSZ$1D#;WNj>YiOpMedXWwXu1b4-w*x_$J)`33M9}ozO+ga?*vd* z7c6_Xf~8CMO&Es#d(XgxYa?-^XNZNh>WO4z(G}spEb#VDpPnvn|t2f9fWC- zkVY9Kg8Hn6bG#Fl?x~8!<(#+Q42AYD>koQcIJ1!{1SccI+tou3<4}srFxe+8msU6eu7QCj1v!o}$DiPmzziFXc z@M=X|bLc_!0bN6^h!+(pwE23+2FJ~Od{UxcIMPeF3gtVAXjZ!!c1YN?3T3}_bVTTR zb(R1fw8+%Zae(CfoszHaDX4I^o5Ei7&Wv$`t(qM=JLSSBm^*e}TU6cL3jDR<_(}X^}_6|`M9osakE@gH`0Nn zGKILn)sp+2x6JXy<-MPrqwOPIu@fPj=wJ!>@pPQgeXHsb7M4oVqgPZKRQTdvGk2SrK30{A~DlIvUohOpM7J4>gJj80Wc# z0Hb6U;vf0A{idcd^?M#MQDHuu==_jx%-f1?Jkz(=;sa#4%5u;!rl^nzL}FWHa@qj8=+~+k>)5? zNuzgWoMgb@KJqSc^v0IxR9W2TyN`kFX4{b>BK$t$gRPs}&TWg$DEsnNX3$43<+ps5 zDNXbJx#{t{$q^AhQU<%4>a?;x;BE=+<9iQXJnYiOx``mP&>D)Xv&rQLcf1C~x!S+| zQ1a+Tv}^z`QxfvpeS+u2%3|tMVQG^Eiw1caCR&@KV@t4~%l9f%GOk&0i!yMudA|(P z#PhYYd9*1iFS5Mbvhx+of;WPRz?xiJEXy{wNT!JeTGZ=F`QFPm0;;qHschB6ADv9r z7gt<;$j*O%(ypL%kX-CWPkRqmwP$(VVU>@&QyR2?HP&LNjd(ZN=997qRiBmG!4p*< zG8ZxR_`G2OolCG3Pgmzg0OpK!5C~NYc*0n$&%O$jRk*@pK!M4H=>8a`D%)B_ zSI}VN-301`0jC7rJhVzudO5-NPrD>%HzVt54I)^zf2>i+_7vsH za&~_G26#!g)qMSgX$;SnFIQprL^D>Ttv(8es4)8xkanwWzM3LgL)7lcrynVD4D_jLq){I0_vC%0^28dXJHnKZlc$X5?rMVsYh8!Q`ZsDbz@|RsB9# zsN*5W3v(^*@Bs`&5UQ^wV97_wg4g53jWx?utdej{K>*;Xuc6PIjypJ)gWQ6b(VaR~ zOinCITx!`jiRK;p_rsg+P9luWRVz4^DSS*30Uy0B@NG#==>a7AVvTi>j8Y7E(p{?1 zXT|lCaW@$!EotXg5;A)|U3D)4)yFUyFMBz1JgN^Sv(Z%UoB{3E5qWJao;(O3cticC zB8%*Eb1`nU&WQ4fKC6!9>gvN}RNuIPiUnp8GJIeQn6^|=?v@1Y9~Bj_zepa$VG&S< z5g2QkBIU>dwN5_#;2H&0D#_UJ0l4){jK^gwM5|0OC(y%2p!#&pmgF@7unP|if^Z<9 z47ag9GF;_|!hkRoVJq&&Mi7p|6@_k=iKhVA>U9bOPTGAOiBha6VJtT7vjXg-cbf}w znWjrJ0W;;Q^)kXxikHW08DQOy%OYXMV|44laS6s&%eoXB1o!F z9&d_NrobBkq&IH$s6zttD(ha-Siw|^ zJ47I0N=F*_yD)!bM`w|nHRwL860maiN+N|R4;9>)F{IDGrjCnE$ zG@ffSw@kzRojqSaQZA5wckJ25xq-8rjGxvfhLhX!sv@QPEce)2IL_SlrwlwF;yC#B?_Gk82T7B4FKP>(BA9zVBiQX%B4oUEB%V&y(NEPm(Sdw zPg5|Ix<7f5a@Xs$>44lp2dM<8V9J^7oMAF0aZ;wlB;<#^rpM6$i^va``g-pVpB}xI z12W-Hl_}59{DK?BxZ5#iyuQ+cdcDpf6C=!#kv%Xm;D8oOCOewvhgop0!@m7e!)N7v z(_^Rik8%q!c9Ua)^u<(J1bE0L*wn~;*dNt5Y!z|u=XBh(OIsV8gky2`Ip!yCx&t1@ z2A^wwJBW8r-fr^#s?oPG^#QBT{OCD8Buou!JgSnz`BQ;S#U0+c`3oi+FYc*o zW$DTXW8UhYa`~!g0lOHjJ~_ci^Eqcs8tfYhu5M)Z@1st)=A8pWlCvA}L>8`}8RV^! zZ92kEM>&|y6wH$btMEgsCR*k<` zQ01k}sXwWZ3gB2wOhi#W;0pxca5sV7WcTrkg&cf|^Rnq8nG^DPKJzIr-5!S*F#sF_ zr<1BnP{F1p&1mtqaPRl%5QIE1!*=`0Ekbp`!IZ2hpxS=#b&3@{GHQWgKp7D!9H$_)64DMem&A}64B(Qu3aGCJPT>gj2>l{693VZ;((tBx5{sms9Ht;Yc zabt%VcOePa^?8@4JLuHnBW-Kkl05@X#rZ7{3damDB zKsg?`_4(HyKy&C%qy2*=@eAnrsv+wzh1TnCpoHGL{vqON2OmCK+3^Dc>KfCMT31#_ zO*riN;V%!{j`2@6gX;hT!5Q2J zxr1ciz4y6i-?hH={k!MSu$bxYx8APq>guPSuJ#p)Ji!r_T`)< z>&!=%bd3ik$Z&opfL^_&;$=rB%bp?QiFOoLS`-9-do~}u{Vx=XTj_|FB5;v^h-tZP z@AM|Wt{!%xcQa5|`>J6fKtD4Se#>2=i{FN?M$6X9)=mkQCEBbqW`ky1C|2S05TsxN&az}Ndo@6z4 zm<_Y>s}SB5S=A>32}dH5;)Iw74%5e#Ap3!tX!Kh|#5JID@(~3yf<-?hk28iW9s$jme4OY* z2AphP1xc{MIXAmKCLzhK9K=Fjk(gp%S7&I$;8lfRPTNXPkcmR!(Nz~c+Mub%+wfxr zx0IG|{Nn2MmD36sG^}=J#?v>zwpi;tl#J>HWZ8}e{2qq|AsCie z#yG}|Kfa^kIS2ZX9ys_z^|x8j^I&J<58r9i_`hBJ6#EqcZ4$wB{FS~YkhDb1HKnLR zl_`H|=3znvq!ty*8e3m|V*dEZw~EN-wXF@Zezq~&j4$8i{p1J$N`JN1DQbxrvs%DL zz^>4Ix_2ka9>QhTE>k;RABrJ=$osK+f<=I38^R?l)*Wc@@9@WM2e?Ft-h&-O136jU zQCyqA+DM~J1#bQYlu0q?4!!yZ%`G` zVrE;H-lr&wNy)F?`;<^t&@GfF>k4p}AL-pKH}V9dt003<$+93xBl)R#zMy)C4sJSN z=Nv=9PJLgZw-7lDeskx9_s9n~L4lR&i0 zX-6Z2O%J-eeuAS|gEPST=ge0R*z-TqVJUEZ#%$yzcUNIs7YDMM!+;u$!vZH}PL@F~ zJUDZ%zC=8_5PR--|CDf}axKP&sJsqMJD5r0q@u{wL3|-6DfD2uyUqPEFTOsW#Rp!%j9%6e?tjfg$j1v8sarT;_iq z^M4rs)##H3rfw5c7E%1SLJAijPKepi#F;vkK1gXy|DI@&wSmW+SRO4`snw@Dv1slc_LN-?l0v(cq2c0cT z!pCznhO=SVw_V*byi|zH%^#+fs`Gbtil*e`+s90Yq}CzvL=Xy`W|bc^S~k+#^hL0<#12B@76G0&)P?7uq9SaI^joSwF~F<52!yu-5O@s=FKj;xx?tt~BAD);^jx|m% z6h^tG4*Tao%)N~6pZLC$aOcIKGI`GfKhZh%JlMcsu`%X1}OA-r*+JPlsiUv5tmt zc$vu^BGv{NIx(2`tl0eYh7B~-qc;WAqeiV)BNoNXcxj^50Vo+1Y$tv3e77kVO*nui zCF=AiEr%)#uJc<#0^Xfav18YkP%IEN;B_E?f1UD?A`2YO301KrX$37%8Q&QGbhp(n zPib=AT;DOuk`Q2tPlPEE>co8APgRnC zHY71RjT`62n!c9VI6(U3d9W=ZX8^dYQ(R@*sEa%m2^)J;S>2ZHq;@0A z4A*@(N3GV6JP&Ug4yT9wPBhXl#u&xV6{ja>U&ScAQ6@Qbl z;HbS*8RA3BI__uTM?>|9^jUzXMnFqf0w&Mja&te#y(b4{=J+pjeUv~hYc~t;K`W*g zuw-v9u|E+R+>198q$N0)W^`=alV_Lp`)>-Q#aPGPK^E&=L;nF{L#bn%GSNMp$$|i3 zk2L&4+O|LSS%UTmFkXl79#6_MT^wdtE>?DueVdc?)I|L1e`i?$eQ^As=>Cs7NW4ko}+TlCvdXzU#hETWp z%}dysZcV+826dI^o!4V>=5tz-H&|g)Mp)xAcK#d-MtW*zTIZK z8F>p^`cg`L-biX@gK66*$f1BoMDcZ6m~zTr1k>~rWH8%=IVmHF zUE(A)22+pXG-t>8tfg)KLQK-Dy=p!C9a!Vw;j^y(Y2`Q0Jl@Gla&Hk%FpJoPD&c2S zeeS!Sb{DC(zDX<_Yj3$SU^$slP(<#Sc`e|zOq||u!*X-^%?p;??)qG3wKuY~o}Npd z;ePWZkLbddyC!~hV(kbyKe}4lNEbeyl8#CB68o-`zWPnu@Y9zqL+_b5>XILaIMT`nbPhdz9fS4q1y9ZED;ZRR>?v*k;S8Gxzn}nWumWrdCki0BT-%+F z8) ziY4E?F>2(mH^lwUf4iPFZRF>1f?Y4GVZJKyuEdKE>?Kv=qsrfVrA4;NF_c<%Zm@|1 zznu9)C)53oM*THaLJASenwHD1&sVBus+KOm-vn4~sNa<(zebZdCE(XHIizm6!ax3I z*g#*C`|{?JXRz#%{8*s)G3`yXG`v$qj|-|1c#6|msxRfl;bwgmzk=O&R9<}FgCBG` zls@Hk^Q4@kalNN$D6}LbPH@LYd=+_%*N@PoygzGKcx$E3`Lv_r50t+Y}4~k#3Er?&y)UVH1hWF?|1avRM@fY+b&k+LyD0IIc|wL=TAwYnkz2y_-P14C3oH zKc~axLihMqo157+2H#-~91t~@uCnM3@on#K36qNx&8|mv_%B^*sH95vh2{G#M_y2; zs9bI}NP}H_c=v3+?AW!r&@#uEoRM$@ygn9o6rEf?8Jf>y9digcg= zPUeB8Yr>X9r5;8x9e;EGy<(NA>3617@<22i&4>~+!s|$mR;I8m7$=_ivE$dE-ODxV zY5&P1p^0t>=$u_l#_I8ryt25aoG|Xa%^?b1mO?;~kSx38fgXn>Z8?Z)cI#4FwbY6y zj?Mh(IJw)?W4h@m4DV}Io;=-dm@WN|;?j`w97e;8Osf;~H}JdP=If9cW1_Ru#Q#aZ zC^FRcyJh%(<)iBs<}d!~bbee%w-|5luSOO0ua{T;UBPMyN!G44To<=i+TJH z>*~b#PnF)xq|as)r;K0Mo5Oo=&CTeftZvCwl6bFxynJgE6keDPjhjMibr*Ga7zx?d zcPKb*XsWPvzunB0B>kSdP&y>lvTJJ?m)`pRFdhRt1rnmnWq*99j{}Q)6z4sMC0$es z@IQx)*FVjzSko)Lx{jNGj3pn*TwDA9J4si3^OFBY?=Ug)20E#nYSd30y0K&YcbgWb zNI1@?rz$fwrAHD%#6A8y?PiW&`NuH3ALJErUP8AOAL$0xrg zp4G<1UEVojM@C^8P)9T^ib(o{^ees8-v)WiQ9D%SqQZdRqC0QvAt4X@t|zab>$y_* zeActI8-iK9@8Fv(mQzUe+V84$&7_y5ZR2UL%Za4@#;)7;IxxMgr!jBgr>~>JH-jyD zC6OTLz@a9!mxB@cSe&-i-Hzx@_oOo`Hfn7gpUcwrq=yZM!TGmQ&DUVZ_BCa{yuoFT z){V@=)LD60&ge0hOeTqn-4T3(?$58Qr|zLGP73%4yAl%{HRXXfq2jNs>;>7PUQHBW zjO(0QlFHU(+}7}3dVtZ;65#2IB1ZDiZhnKtZcv+6=nwokaW}71d>ll#+phC_H>G2~ zyZsXRRi@)??7NMm9|=1cMW2#;rrE?(yO!?nC-Vn+HrRb^wHmVwSke|cV z10X7on4d`?8aFr(Ri>OZqi!g>W*SpY@jhaKjP-ri_WOn6dvx8O)_1p;M$x9^Es-p! zs$172du0{vz81oJWHIyM0oph|>2E`n$++ZDl5N_iH{vXw*ziBw>a&KLxm#lmr|asi z7n;SqN(gc*qp1$SGWWRNC@Y{Ne&hPn?9uGgcmto+{9V5+Z>aM-&Mf<@yUiJ1ilYY! zdM5Haic$(T_CEiEIP$#c`m+*kf6vp!#ZK)OH|zIc?KBGaQbmYGe~4B5=QGUJrO5Kx zhbpDN#C>13D7|q%)EIx?F(LJ`JJ}w#T~CB1XNLVz^LvbK%1Fl&5UZM>U7(iPt&5ih zOO}y|95rNp6Bos=jNJU~eK^y1GKioe)(BXfv8qFJnig5w^WOHq`MnKYb)uMN z)MS<)#)PyS!QePag@?bKL(x=V>;BmQ4%0N1@DWyO9+$JB?Rrd2ff^f8WbX6UV>+h` z3>tt&_&6*F=Rt|XoZF;b-Mrf*Kb7BQ;@BT$^#!G0P9HhE?!7e$6Q1S%P8=WUIM@;P zi;2#)et(JTbZ)cl?-iZ_xo4N>hp{iqiQLNEEu1t%IX$n?g-`1t`jB7PNIu&MMa8Rlnt2GiFs zd!B=nkhe=@^!O@>2Vtj~;9*YaSNe>Ee+Ssn-lhqA4DS9@@@!?7oZyPDf3e3~c<;Yr zk7U#?VaAyV59{9Dmh29NI*NdOd%ugm-IN&d_$4Ol^wBYPiP*f(F7*)sW$fI`5X z5q2e{L@bA$Xt}T5Zej&2cmyv%6#QI=_0aR*&VnH?|Mxrp(Cy@8~9v`KW0(4REKXz@0*k?Cxo@88howXgi*7j%<30!z7-#?V6~P@`^P7H$aQ!|~u>%h5&mYLM4wr;G ziITv17Iim_tiQ&Q7WdubX!yu_>>c;Ig0q03&u?q=!d246T4GJn6Zh}2_<%>r`{o~jwJuFpt5fcpg+??7;si~Tp`Ih(^IP5eb zN$>o>V^e5mVz6|l_RHbju#K!q^&%{@_`c`4;J~NHJwXblwoT}kkr7`aP=&BDbW}c$ z5aGMtSH;D_D32!@<}8ns``!bC(|USZ!i{%IEsF_m^UMQ>tfIei#&NC~JGF28M_P8F zr$n9fqStN&^1;2U8^~Gy)=2|+m$3Km4)b3RU=Yj{YUI0?!J*C+$(aT|I1a$6p8gPm z@+oxon)^AdC25*Y9))~xZ*1T!kp2kg1jxbx5bA5Rjc>yRb`V|eFA+qI*)0eUh7RgU zqe19-@RJdFy>-N-7+5Vg8C3Es^kK@9(F+24SWU;Viw5rP*>9&P44rg3!2J^>Ohsr= za7o}o4rRj}y_Y{SN`Wa}qThjO?ppP6xW(?F^SW zU!Q^LBJGJ(?BuU8+MQX&I+BU3y~K9@f|}_>DxHTvK>a3e)Aun-yEQhk<#MaQ=q(2B z7_v_Y?nF8wsYvqIL}K)uKFIDI+|&O=*S-$VgKK(*VOhS4=k30XhK-7RZ!@xJ3Ij61 zi^BEH)lb&zbF!H72OZ^t_o?+bb*a3!xk`}XrV;+ngllqymx$O_cf*9P6e|_&M(>}~ z5&t_di5dG^L_r(|UAiv($Mzm96_UidmrMvU3pR+jJuO$MQ0L^}RVRy7F zIN2pwP@~o~NDZl$h%#f_4N*PQ*dyGfvK;rF4~qw%RWxm@pDcdBkz)V1 zdsrT#hAAd0Jz1S2z7Q9l+c3*k4~g62@3Vs0hoB)@R>E<1^pth$xNg`k7t<3QA|}JQ znCv1RUc1`>0v_xG{1Cgcn(i_buo+uMS&vv!iw-ck9Sl)pPKvT>%Bf2LOL6+vfsz`8 zyOIBUrs9d8aG__0_zcTK;J!Krm0L6LKO{^2hrApob9bTbTQl;U{qy|@&ETTy66lE; znR=P1jQ=lrQt!}@w_S1g#X=`1_W`I+LhOZ1zY2L$@OCVa5)c9^s0RO)#(l>!kd;zc zeu0QC0H_}RY`opQ>#x2a&`z|D1`x&S8>U;i2cK^h!es;M-%0-e!i>Fg*-om>S4wTSjN&r;Vsb&dQR^Bud4OfP)pKlX$+_~!xET^KxMK#p>qkP@7Y0PW zBcx_DEh*a{;gfo}WJ=ru8Nu>FlmoGxEcn6r2f&X+h)b`&=0@`9N*UI)XK z?6qrt+Ix}OTu$fnq(2FdhRE+opAC#NhLxS8L5R@Rfyxmb+-CFrna`8KB)kkFF@dZo zBkh~Ll_aOHuFs?b49?Eoa*3K5i`7f|mO4#5dT}+cR3h<$dz?ms z#88aXV9JhVDh@_4Y`z0;~ z7Xu8ViU{kV_EWF_@;KOG?W7Vqb`@W1v67Dqq%8-J=$!G(T*Wi_IPT_uz33uJd7i6H zi;M5+@bZk~dDc012u_f;XvXgO`$7ON@QLHb&*3w9cFUb?gQX8U>1vzVG2Keq7gY8L z#6FE~0c{LDs+D`~KQw#CDZRs*7(R=V1@^W=avE=n%2_axkQ1gS<3gp*j2!4rmYzdG}iez1{PVWT3BxDK)rfKHdM=RZ&1uDm51>*$e z`@BlM-q}}HI*>#=!}q*QXR2{mEKYZk7rv^%6(jM5D!Ofr3QPV(rCn4s9@Y2Dq4IL+ zko+39C&}smI#p!&O7uucn~&NVA$~q{;>J_|F%o+RswqUYq_vfYSw@!u$r$O6c>Y+t zpU``9z7Ev(T}T0bTPv-glI}Ak1N4-`OgFbv7F+Ffp2uPW;13srvAUDZIjOh`H0Ar7 zsN)|&X$m@{dR>ZVG@gO2JS*CEZOIc9?_|5tw+*%xfe9PByC?f=ttDfOom)e8_#8_g zs>JdjSnr$Jrh-=)(q8eDumnFQ`$gA^^mUUtnF|qcE`I$@UEV0R5szo7-6NF5lkCM$ zQZ?%a-BN>Xwk$40%+rd=kV(AiGTL(6KIJpxNZ3OeCWWPyL@rSFNnfT_CNKwnO#E=M zS$c-%&<}^W;F#MQy@;rxtG|yghGqc4+UU*Rf$`8V25qlwxAc1A_Ij{5hebh_M)I+( zwDL=ow;@rof*`RDa-IxT3$Ido1yF46UWu?YyK;=kdT&*pI|P4; zC!@k(o=6O1Fu@7)6i~Ohg|_h3ky5hb56s|}+`M+YNlivD=zXGkUn%dKb;KSz;h(kw zeHqGPF7RXT88;;KS$@XaAxO*F(}K}%@3lIWcL1WN!9)wB&R3-9EM7!z`%Jvl;d`Q% zMTZrzYE@icQac5ym7Ahz-a>oM{_F_scg!?vbQc2fhPxN=(96r^lIw9 zqpStNNJ52Ecqx6jvWXpAQOhL&=UHDWt*_@w%)~PyJ)@Nw%1$2h&sB-%8o8}d%%5AE zl}E@PPe7!VX?zq3i>1;68Xo=`+m-I`T#^}X-%TwK%h1&I(5)MLj)$@JPFB!`Ty(lF&@Vrs> zoUaCBywhrl!!mG9T6XFK!m$zBalVnh3In^20iQ5`^|^M=e{B`C!S@ef}+v(3Fsos-vHA zz;|bcAF@j66K*2Z!|I4rx7PKkH9-q+T+8129}MDR6{SGu_N=c&f!7FUB- z$2Y2jcxe}pe61j+b6Ej3&J8?B6WP_~N*TjjXeaX}%BAn-G{44ouL~;|`PYhc@;aoK z>`?4t?av0<4unc%PU4AdlzqEsQ)%zp>F`(22PL}S$nCzdSRnn}WHB9!AIP(imoD#U zCy5c&a*pQ)DRy2+T#_>bHz^m+rY%hnO+0JlkpXo-(}+R`$V}oLiHF!b`@Maemuu&u zFVX;rVfA@gazlHOa55TGqyZsuVZsoII@cfnStpSmvWOf0d`ZgK5Y&B>d%cC$Goi;1 zWerwiuXDH*j3qssAph1FvONCYlqG+REir6a#0t;0QyBvN9OV8kL z9gVtEwu|LvjmAq`zsheaGEk-pTI}FpIWBCsv8xD(GV;*70+0Uo8j6?0P1%Q^O{KN+ z{^GWcu2V*@qdC%`#7Trv#(_ zPi<^$D}yKlF}ESSI%;3k>Z4T8%v}?PF#JJn%a2<0Pp=X{q9s}$y4q_B+4J$MgRv*p z=ejVGv>Q^{fG1V^XT3*3cGeAC!s4I{!t#~(ega2^Jv(JX*U?@_GXPeEwdY<{O**?+ z8{KOAL*$0O4kvC%rqFCgmQW6)aE7fCD3UZpYazMRNZaVum8Oewwga{P(W@3`*sS;( zF&H|D=fg1mnMUC8i&i7Xf*PaQIv$GTY7!NmBYWAFwWiH3@)Fedr_WJ5G9XwDD!W*u zE9x~*VX|@jjPNqk6!&NH8Jn}t@#U;c@?lruJFsMb~r6CgnjMox{9NXokgD*N8-{J~GH72l{lfK($xdMqAKlQ(RF; zj`D`5<&9wE4ZC-2DhNw%NH;>BleoX&io(h^1%C+VEdAy-Q}vM1Ut8oD%>?4PsS*|JHQ_D9!0XUt*S7&#T zpK7F?uZ_|8md015vXveu!vVqq+32``x4zqFP7`m~@y+*q# z@|bjN+A(yE7rHJ^wK3)%UAIp-R@}wCa6I?W#1up9bwn1j6oNuR=I6peCcsY`dZ2q$MmDaqPGD@7HH$q;O?Yjn|c zlpb<*UE?~kU-cT9-^`qJ#XdNB{+)743#L4uNBdfDX8p_rV}U~g2J&$02aIhwI43*- zj|!6QuhTE^rkLQx^mnl`*%^HNdD4d=yg!%fH#Wi5(p}jAY8wSoA*dSF|VYU z7Db#Eqmgr$a*Id3F{2l+cSmD7!&#eokXAj=>8EHnUAE+p-H_T0Qqo6;y}vEExlXri zqP`?S#(`j+e}xr8w7VA5pTxV=xyHVzeJ66XKy2l4k|I7@$0}$uo2Hv*yv#$Wj_cU~hBnu9A$P%^=uTnj(%yBQi+<@v1mBl6P&cG$ zZ?v!lao#WU(MFLa>0^^y*(tuw2a*jO!HO;IlX&mCL^Hz{Ad6y0_5(}tF@;b=`!`z^ z2xkK|y=m`8pCgbPW~a-;wM5gDbXu;r)QhA1b`PoC?LD6EO;7#syT(;dEHJ2y9<3wu zbV%EDeIDaR+b?6cWvo$4>ve(ztbHVM7TzO!wmI;T!wzH7^DOHD87&edz6FhSH>CE% z*1|HcvJj>+FA3k9#~;4+j|BqB#k3a74+Z&|+_qe{0%X_9uN=N!2y_Z-SqIgjSOrT% zaI_uQ=OeDi#b2K`Pr;spB^a3RTTukF1s$*>(1qd)wr|*2LKA>pZeHtG#G5F_D{Z1V zyR8NGx{6}gNbDopEp2rjFnfrga_OBb7I!h{u$5PtRg08%7Vot_e;pV2Vomg*5xiMS ztSN+y-l~JJGz`7^)&QGi(DQW7&Fq8k3Ig>>_G;;w9{Pb~W&S$z=TftJx?-=921`eW zn`)e^(ls`vE%dbeBHq%fHo;0QkwN`)>_ImAEPY|Y5&}nlqxhlp)09T4n;CsBW?_iE zoaVYinI1Mz`J4Cgj$79BVO1OP>sOTBisEix>&>27jh`=Nn{Bx{D~k{ktgrnGHio?! ze_rE|mx^Y#Hs$*l_D{l0j?M8f^jo*+a5i^W+JR=xbyEw+`vRqv#|5Z?=?$;^L57jV z3+=c`ZN~){gH`RxKT!x7aIsrJwfP>Ny)wSGds~XNp?D-#&*ew*&9hyW z4^}2a_IXRO%Dg~mm^E4@@*rn*-!0v@FC4fR5Ui8w~iiVB>vFo{x#t(-y3pkLUZ z&h;?^1q$z^qByH-Op7~GP%j(6PD2Ht@GB1L{`-;V8sj5gB#?TwYmRk8frAJMEo}j1A zn&HSo5^=fwG~4DpD!*`kVPvpQ8eS~AJch(?t?w&%puB1{-tir7;v-k^W(k5 zm=YFQNV8mRkAq0g_b`t!&20&|!=j`y#0Dw^5>n1`??O~ulIjMy(8ry29LK;_ax~+2 zK;UjJm4xx|9|V#YSamI9i1B3;Ej<I zHxe&>+M*5k1drGh?*$hqnT%=7#5H%Kj$_bzAkV^WWC;`gD@l0^914Q8J^O9zNS3f8vaCOi^~9eY4#m*eb9QOEON)FnTBGGW52ngCA_NYJnv8pdF)<1hZcTG;1& z{Lta$HmC8a7qfY2kQXfe!gylhO>}iW)6B=w<>H0%pI+l%JO-kxt`pbz5-(LuNA67p zzqx>%DRW$wi4HvlUR2{Pk&abv2{-KBC{j|hgJsVSLLmF824|l@)ZDQDeuqGgVgDIV z0f?9Sh!*xz4bR^47zT6bdhS4Hy``8ksXE;N9Po}R?^jLJAx|lmJ}e3KXkhxUW+^O4 zXMj%CgQ=f^KZf}Kxj^S?Ww{K1tfGl4_26StH_LzAr(8nyA|@$#4r|&nf%2FHGCnuOmBP@y^r1+kEjEvJ2zAowYK*840#Gq|a6 zi?GA-AKM~HU-wq*z=6JR9zU(zRO>YLsf6UhQ*D|y&!7I@R1*fXwZ@}*hDWLFw={c= z^?N2mo+41`fk%BfDyV9>7dE{U18ht#S1vm``W13s!Zsy|Ah)h(-Qw)LWvG1Gak?=A zkL%hDyeqNuX01+F)fc|cpQ^)8wx3n-628KG>lUw{{!PfZ%&ifUJ3)^<)9y*X^~rsr z=&ee!>#GRJDne=Ce&bYMRiw>}kVz)W*DR6Hj-Q6-SfIX=u&RBziR7==X7e72M1|lM z;g^Rk?||R<{nFJ(Hu`95@;;RnRT%D}r&sxKG*CHp>Gpc^Y}suZOXaPyy*O^M1#0s9 zY09o~x9c!9U$?cmu{*H2=RwRSI|pDg7%Q^r;pnPx5V7xLcn%HtLc{(vVWZa1_rCG&ChrJi0uy^q83r82Jj{4SdUmi-t)zB7 zd>YSjdCw2h9iyLMpFdNcnEe_N-PWWpU4=e)!KydhCVzN{dX)KO`>E;NWV;7d01trw zvpN4JIgyEpCbKU#d3sND;hD;`qo#=9#b7p-Q~$=T6kXC9ESrwPv&(+9*AOfQ&7*jy zi~6!ih!R{rxxUdQD`NFFQB9 zSrtQLPDcEoX*gvrSf%j-h|(HPjaiMz{CSU4iHfHOXnKtPR?Ul!TLfZg=D5*&J0NOb z+et)tD{yOc>mhuKi4VEo*XPA*ppSzd#TOj4R)j{Px$$K2FA;p!G-CLUg4cRs+LxcX zsiRa^K>Q+U`X@ms5Ag-*soQZX-Xa!wR*^pDq85iaT0Ew(b1B`H-Yd2=bI3Bgy{f`A z73?>L+2tX39FG1Zb(}=k2Gb+VF8*s$bRF|am{{yO^Y*CN?$)+2BhaeH@cMOlptC2lZSXlP78>N0%;JtBT}PY(DR|=!T&#IO?riXv|%P zUpuS>><)78V(z69FA{){t)yA3#?(L}Ppy^hlXndW=Z1M&8c{FKuK5(oo{FHe^E4xY z5n(fC^r-f%@1}pynNJ6xygW;u3~t2*y+=>qKfFMx|bpM~Gj*F&~9)hhSHX87Y-HzoZRzYeYB$q?B zUrWMmUSDl4eSG$Vt{VuRL_uM4pm_peC63fUKK6`u-+DblkC`UVct!s-R1-w|Yzm#Q0NwZ4Z&H@qnror~T{?)8>aD)DBDU6G8rng2R_0@d1k5}cze5ntPwJo34#L)-Jw5=t>o z@Q{$lYc=pvYrJYSO?`o8EL8l`U@fSc%$bvF-Mf5Al|{djhs@X7OWr1)(Qsw3?sHG4 zFGn`TlHjXi8gYQVwlhZryIIE@|Ja?YSt(zfnedH4%ODz&d^L<^Z_cwvU&UFrshrOY zUkK{0J%7>e%K`GNhe($5HkyU(0vOP;T#}A$j5{=UNz(gHX#~QWDfR^iHncl1ep|oU zs*a zW$WYMMUMn0l#Z_PP=Bx6Kljb&koQzR>|DE6Tm=kmL8mW7(hdM}>7(n1r=Rr5Axpx( zf=7Z;V*URvxCX;{A(QGOF2Q$bs`L=L^_xh`u)dED?h)tTU3wmn%!&uyiF? zcBtQ@_+E&qWx#6@YrL#di#iTApt-kxR_+VBXxv_0cymN(G;9NE%>#UmSpk z7^z#hh3EOM@nxmn4AHp7Po=O9&nC#lw9-62D?O)SDW>^V5DcYG5x7XR^r2`xwnOa))Lo>pi>QEj0OG@brkPaX5b;Rh^Mya&(l?SL z7mvP2K$v@GLGHj`us12}a5)y8KU+QVI2({$!i(AG10h~9^qugxUiBP`w1%mj_^xdp z)FR6D5Psu^&aRNJ(b+!PNLs41J z!Pb1zGMw$IPa2pXiB(w{zb^7p!f zf$-$-BaE=OFc;Pl$-9P^GP2D%>cbMuvhCZ$H2>D85u&x!08KM#7V9eh0PPt0x$+6P z990LUYQ4_mp_JO!HpYr&F zpa2i=Ub~GRi}MBj4{1Z1d!pTE8JQ3T^2-l3UyLhyCdW_xz`|$0#lG75l=!Ofh?OvS zz4_ELIeEHCYUBdaB{8x96`T9)_*xkIwD^|A?Mp9uZ}d#gpFSfMCuohg^||sbi{6)B z_Aczf`2f-6^0V~e7&%+oYa#2w`2@)&6=!YLZa>RBXn6vzfuv7=HDZ9uPI@T@ zPv5YmjOF-~Zt$y0|M6z-!TAE2i=q|+VRBw?t{qWl{ay>}^H}J?$%HKQ7?mufvwMPO z-X5k_?MrVo1z_*c{WGfTwjR6hfArjDCumPZ>-O0|ltWevC1ag8Vj|#AnQQYO z6Zb)?o63EM-Hlv~enmZ3R39_RF=-vTH*7tvTzw{Q1X4$RRQL^UHp>BR9yHz52ju?U zkr&W|wDc&9(=>{!vF_89-n0DX)d{rJK7j$AthnA&Ad~hnG_RfYkj%gPq8CN@(7@U5 zb>yB1mB4uKX-W@jyGrn26JT_mC&@90)QB2%nAY3o|K`e=UbV0P&jo-X1=utB^K`C9 zeo#1NG}z*~m(YHj9M9H>l`131g{-YKahi&MY6L<-Jwh z*$)}^t({R0=Z0s31N;PY?pL^O43M>#l`N1-15;jmVJNl4?AE&Q*x3yQB?uFi!r>Gr zqxYn|%k7Brlmv1TIbx;R2KG)Mv;F?(r-Sv?d)*_!SE!ZHFt3ZjA7@?{*t}wBbZe2+ z`A+;T+1WCT{vA~&zW9RF=*Af~sL`f;z+tCc5pPflx&26f zsTJ*a!q;>6;K&R9dhXY7-FvzxPGNWHZ*fYkA|)ZM9{uC2i#YiyC$r~>^Zby7Ub7tg zvjlQ=4{3F5JD+|28Jcf0jf4myXm=o4=gD=AIxgV()M`Ix&f^!-(zMNS8B;v)<&WCUW+5I{dILPdVwA#8Y zj44!?LK^y-$+>PYoJp0yzxT-^l`g-~xqg)L?qVEIXY9e@K+97+5SA%1RX7k=0JXX0faa{Rxo`;^1!-%I%f zZa6FHZ|iMLJ`;WqD!a2ykpg%zxXD?6BAgyDzW1zf?l=S6QAC z;Iw?E<=|0>EHHcm#R)4%6~uG)586$TpY_ep8rwr|ETwWX7B&Z-rFVQe~k10>&AKT zRn)Rm(20l;uZ3I~59+PEp<0Ao&6u@!1ApB?dGcS0>vw$gTaG#x7yomCe=yfMANcGd zA~YQLRCQTTS9uNBx2|S19Hwt!zlw?dqcGQhDl93fsw5;SNn`%j*2V0fYw`Y5EhRI1 z3s*}TENFh6G(0pMlD4j9&h~c?yr;UW%S-*=Iqa!%99%23d{(k6G*8YaqOQ7@ess0p^DwlS%XgT-Ba-H5u8} zd4LT%q=ffyA?d1D7c{+AH#uzq@ttghQ@spA>TuP84|txbL$JHmZtxhtuj>?n1Q#A# z2OhX&ZjK~>CWA0`*&>>lSfJ2Tb+XGI@UCf=wM~`5W#Y#F6CJOG*KL+Ae zgs&P^-K5+%NWYH0F>{AUViMfyDbwiaS&9-ICqbf7{~AN4?MH4|0@B@&2MBOzU%LdG zov|a&1ufIyJo7A8B9J~Rn&jb4CRnYm|1iJUuhKX-;bQsP`D!WLQ)%d^8Y`NUIH1fS zP8R;Du3e(#r=1dtogLkYKbGfh(;zB8jVsX~UQo?_hV7v=NkEz|$aecXM+*_$$rHU~ z%Bt3HnsC*2C=#nn-&G(qa0ke6)e9+yRH3$ppSzZ^7%?Naw{2bE({0#U*7>37K@Pv# zLIqC~`*xvP!g<8vf%J-mJZ7@1dHH!SW%~QEu;Yi`U7h0IfPsQm13x=iQ__LN`~R!7 z>x^n@YueJA0!nX}(2D{g353vl6+#iDg^7pngO`0@8sR2QnpoA(- zKzbKYK%~AwulH5&yVm#PJL~*Np7msA_MSO&X3si1Cv4HwXfcT@G{U(qOg_XsPTZb4 zG-z-th*esMg6(FS8>6!i@)R}~F5(c!6ZN{x?%-$N;Y7`tLcFG=0eQ14tEUwSPT z6mYb#$4z4$=`nRth1X<$=X9*Sw?Nhfw(QB!D^0}b;wUgrs39TZK;jFwXE=kwS4k!S zr$U3Z=}GRBh-(wOD3d-{Q5-%+ts*&0ZyT{fF4qVThdp_tw}GxI&+ogbg-5( z^_?a?AkjpK3+1g1dZsLu!Z_v}x`(Ro={YPZudAsT$}p0&szt=nCn$gG5zJ>5ycRWA)6B^l{+X&dNs`{IX2Z*ry1>B)b`Q)hvdl|6(t|_3-O5D|mOnKYdZy${C4Kf$ zj981AZLF?kM?G_!+eE|Oz{!8@ilna!Hp=X;RMj`od4DTKK z>n+x3k@9=PD)l;?f(D6Capgq_DhT3u65q($Yk9L;DBHkG_%i@V z0lEYw!z5urMz}7ghU$m`Ot(h)y7G0kT;ntO9s}lu4{Fma;W%}yE>=P-5uEZRW#&zB zVo|(X@=8je06b|Z#V-XZo*{(f3DP}!dQeSl)TG>G5|UkJmZ>>a*{l$zqmi1GjJ}Ib zMMt1-PfN8L3DhTqBvhS)7bq{4^$Fziu&egxUhk`})UT8sbr=P5X+d-L`xBR1x40K^ zTT~7L4pwu*bDVSRa}}M&SA+TFOvLC)I;<} zGHbh?5XSx>}>3&>?x2oDYjIP)HkVfsSA+yN@q)&7o?VRmJ2UzfXNR(y}jfp zP!R>oEG}8g6YEvX881>Ow#%S0~42QYw?x%({A(5f{-Gqy7sKH4;B{BG~Yjgy_WB|k$wHqxAQa#QRY!; z%;U_+3u_ll#8xhFTZmYYnY&jmm+ieCZAo=Q*kRl6xX##BbUz)Re^Fc@HtE~AhLuZ2 zl#Mgj8Pwr-Xm@z`1@|Q>6vF+(akO8DF}}ieq1eH*D~nUr-J8)f6heVQ<^4K&lW7*+ zBHd@YvuRGVY_JT7v3mDzPRv0;~ z-ak*LM;EUZ)t;1>xWJC5vALU4dO7}bX;s2k^t$VMo}#Q`gkp>$LxXQa5msUsbx3^Z zyEp!2Xk%e-Xdg~0L;8$rkvfM|lhlktkp@I{j?9PjbrVrj-7Rj3J|w^O05h@bftqia zJeAaG1k>I%R`CmvXM(HOt4;lH`umZJp*7UiTI#f8JNi1bqGFi!nEf>0Xtin#YUyhP zYNTlBrI=r^sF>A^wu}ZmRbb}kU<%~-^)C3@H?9{xvpP*Z^GO_ReZOLL)ceI;K^MLx z4%uYkCHfZ!H`gTnaRfti!Q91?Eo6<~9O+PEUHE>&LfiR;m`%j4!$CZ0O6c>@UiFj~ zM1-AAi4K#_3^82g#Ydm#$j>3O1xay9*&^VA-hy{7p_RLp{i*_}$g=YYu!0ay;x!~2 zs8-wQGWf}qAxpV8{-W?T3&*+#_l_=ZYe(}x61Z(aQ{no2!b!^^qtoc^+4-&6)LD-% zt~X!a?BA4fl=Aa&YVbX}bI>I9$&I!+?0t##SdD=nqN#RjyJ02P9pJ7#veBj47307$ z=@B_GVD!YOtijf~Vtu|%wMNxw3MbZWMSG?73e%OfamR{c*Rd2{o8E17*L37GT;M~( z%A^JM^0)D~JE3d1+@0KgzmL9@ejmn{YQLC2L|fR5^pt!=6wkf=ob|b??(%Ab8OqAb zmd+*CKDH8+{~V`V>Z&!Ex16&a!XBGCk6R9`T1y*k9~)|%bPwt$iXmyGQ2dLLvp%Tf ztD#%5ihC>S!_>3a_?4{X+D4}!WRUur_GUa@D@-f5y*burP;Oudnh#BqZjMQ4E1&Ud z_dNW#tro~ov}6d*uGuA(;el;Vx9@VVr=OM|xgLovToO9W*m>Dw8EKjM{=@r-mpRfd zffoZOmxBA#xVpK-lpY=5*oxkHm|D^4q=%31W_tVxmf?5vvuaz|W>f@Axa?(&Vr}JY zUs_37$wXI3N~z6ejd#HqZvVhErO2rShLFRiVR37B4rAW`RhNDrxq8!O>BYJ`Zs}^p z;r*H7dktjRg+0W+#KcZ5cJAJB8V5-f`Q9h-hQMQ+4+=}ziNjus0y-O|w*fMT&WCoZ zGg(5^aMAQarQ*ZE-66*U+}7%oAzOUKU8mSP)oK#Q$=^!hugUNQ`16{Tn#)?nTKMR1 z(c=N-Tdf;i(JB2iVT~e3FTW=D7G>P@+s8Sf-?VJF^{zZr(w)d}fbG5CabK`sobC5> zS$~=rIZ@Q0v=efexo@}C9>25---Rb<#3=dQpgQ>MFX-+#KlwHE%l`kUiBHPVAGS03{!Kcmn|~IyGT%A>pQ1L*bkcL|5~E^- zr*RIm62M$5JhHXa)WoWWgT1(oTqiMiO_w=$X;_qhRu&pjz(Fy#^s*2h7y>0fAW|M7 zq0^Sqqug4Cs*z6fGHu#dp0sC*H*LwR>fXE0VHnE&kJy!46+z zK-7oc2wwOr4<;@w+wN(jw*zi3^%xr|_IF*X$Ul7H+~}7kB`zXP!9G6c-`g&vV8Ek9 zb+1Qsg{hdbo?O0u*ioVig(P9^NSreD;Hc$wjISt8KzZY*Bx1d)B#-IDANg#pB5s@S zKhCYOtEP*XIKOkLAwp^B#TMs=UKz5qMh6u{wi~=F#ZyyZT55ak;4+aZnj%FY_}Mn-(lkN5%sZ27hCuc znd)UWA3EMuVfpYaF<-2GW8S;jV2WU+%HsNbvUh#5P;eIWXT8gA9wh}Tqa~>pMl^?@ zhjKQt9U|^^$<0aJU9`2nnQUFmx^2Bnf2IKv#f#P=L2P|@Li+sk#+XHcdvcuV$*J@F?r3z#|`-&oVG6=Nif;_mm$i ziamcQj1|I49A1pXSk&?!@*|QP;>F2eb?dkcf#JYXB#L1?65%}PmLVl%V$c~l%QBaA zP=D}To}-Mh2KiTF-Yjlom1^hR=|e6YD~7tlRsThx*d{3}gz#u&8u?J$rPk0Q470^p zM&eUr>5jJHExtTo>&J8VBmHI5BZagh)}!Dp?bBf5s`UC@n_a=OrzG1)i3~tl*^llQ zE(&yjr_*G;%rv&BF^_uQA#RdS(q`!t`M#I#WV$?3T39MVb)&Nygc#HcXL<)=!p{jy z`&Hwbqq6xMZu8z-RGHXs(C{5U?f%J1J^Z*?ZpF;mDxU$nM!((2K6+=vfbb%qtSE}@ zTHN$Pw)QM}yoqetdep~`&mqYR%Gxs`tQL;GY<^MnO z)=8%MzLp0QDfe_Y5*#Y1t0CRvOQ2gWs82O;QF#!NWbxk!-4~&C9M&##M zK|ebY!s@ahpd>^R1Ox$LU>T64B@ie?IR9b(B%WAj=WIuV=xfmKNtSs!G4`*j5o^Vii0=xNidkW1Q2`<0!SEpdJ-c1B#H^n*YR-l zB!v5q&+3GLmxnk=L(w1^5ZVqb?I=UAS;pQ$1_ef=?4b}e6be(M{=X?dE|_Z=LO6d9 RB0`wJAk+c^8gNbO{{Y*J){X!G literal 0 HcmV?d00001 diff --git a/report/rabit.tex b/report/rabit.tex index ba1eb647d..a075a37cf 100644 --- a/report/rabit.tex +++ b/report/rabit.tex @@ -4,6 +4,7 @@ \usepackage{fullpage} \usepackage{color} \usepackage{natbib} +\usepackage{graphicx} \newcommand{\todo}[1]{\noindent{\textcolor{red}{\{{\bf TODO:} #1\}}}} @@ -34,7 +35,17 @@ Distributed machine learning is an active research area that has seen an incredi \section{AllReduce} -In AllReduce settings, nodes are organized in a tree structure. Each node holds a portion of the data and computes some values on it. Those values are passed up the tree and aggregated, until a global aggregate value is calculated in the root node (reduce). The global value is then passed down to all other nodes (broadcast). Figure \todo{add image} shows an example of an AllReduce operation. +In AllReduce settings, nodes are organized in a tree structure. Each node holds a portion of the data and computes some values on it. Those values are passed up the tree and aggregated, until a global aggregate value is calculated in the root node (reduce). The global value is then passed down to all other nodes (broadcast). + +Figure \ref{allreduce} shows an example of an AllReduce sum operation. The leaf nodes passed data to their parents (interior nodes). Such interior nodes compute an intermediate aggregate and pass the value to the root, which in turn computes the final aggregate and then passes back the result to every node in the cluster. + +\begin{figure}[tb] +\centering +\includegraphics[width=0.7\columnwidth]{fig/allreduce.pdf} +\caption{AllReduce example} +\label{allreduce} +\end{figure} + \section{Design} From 2fab05c83e3dd59d1734ce6a1f249717badbd927 Mon Sep 17 00:00:00 2001 From: nachocano Date: Tue, 2 Dec 2014 11:07:07 -0800 Subject: [PATCH 043/531] adding some design goals. --- report/rabit.tex | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/report/rabit.tex b/report/rabit.tex index a075a37cf..8dd4b4508 100644 --- a/report/rabit.tex +++ b/report/rabit.tex @@ -29,8 +29,8 @@ In this work, we propose RABIT, an AllReduce library suitable for distributed ma \end{abstract} \section{Introduction} -Distributed machine learning is an active research area that has seen an incredible grow in recent years. Several approaches have been proposed, using a parameter server framework, graph approaches, among others \cite{paramServer,DuchiAW12,Zinkevich,Dekel,Low}. The closest example to our work is proposed by Agarwal et al. \cite{Agarwal}, in which they have a communication infrastructure that efficiently accumulates and broadcasts values to every node involved in a computation. -\todo {add more stuff} +Distributed machine learning is an active research area that has seen an incredible grow in recent years. Several approaches have been proposed, e.g. parameter server abstraction, graph approaches, among others \cite{paramServer,DuchiAW12,Zinkevich,Dekel,Low}. The closest example to our work is proposed by Agarwal et al. \cite{Agarwal}, in which they have a tree-shape communication infrastructure that efficiently accumulates and broadcasts values to every node involved in a computation. + \section{AllReduce} @@ -47,9 +47,22 @@ Figure \ref{allreduce} shows an example of an AllReduce sum operation. The leaf \end{figure} -\section{Design} +\section{RABIT} -\todo{add key design decisions} + +\subsection{Design Goals} + +The design of RABIT was motivated by the following needs: + +\begin{enumerate} + \item \emph{Distributed}: machine learning algorithms are inherently iterative and computation intensive. Given the vast amount of data they can work on, it may be intractable to perform all the processing on a single machine. Instead, we want to divide the computation into different nodes, each one would be in charge of computing statistics on some portion of the data, and then a combination step would take place, where all those independent local solutions will be aggregated into a single result. + \item \emph{Scalability}: we want our solution to handle a growing amount of work in a capable manner, i.e. we should be able to accommodate to data and computation growth by adding more nodes. + \item \emph{Fault Tolerance}: we assume an environment where failures happen, either machines can go down or communication failures occur. Given the computation intensive nature of machine learning problems, we want to be able to continue operating properly in the event of a failure, instead of starting the process all over again. + \item \emph{Programmability}: we want to provide a clean interface that can be easily used by programmers. With few lines of code, they should be able to have a fault-tolerant AllReduce implementation. + \item \emph{Re-usability}: we want to build a library based on a few low-level primitives, e.g. AllReduce and Broadcast operations. Higher level abstractions, e.g. Recover operation, should reuse those basic building blocks. + \item \emph{Communication Efficiency}: closely related to the \emph{Scalability} goal. We want to send as few control messages as possible. We also want to reuse existent connections in order to avoid starting overheads. + \item \emph{Footprint}: we want to have a low memory footprint while running as well as provide a lightweight footprint library. +\end{enumerate} \subsection{Interface} From 0a3300d773e46858aabea1b0d41cfca55781313e Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 2 Dec 2014 11:20:19 -0800 Subject: [PATCH 044/531] rabit run on MPI --- src/{engine_base.cc => allreduce_base.cc} | 4 +- src/{engine_base.h => allreduce_base.h} | 8 +- ...ne_robust-inl.h => allreduce_robust-inl.h} | 2 +- src/{engine_robust.cc => allreduce_robust.cc} | 4 +- src/{engine_robust.h => allreduce_robust.h} | 14 +- src/engine.cc | 13 +- src/engine.h | 31 +++++ src/engine_mpi.cc | 115 ++++++++++++++++ src/rabit-inl.h | 123 ++++++++++++++++++ src/rabit.h | 96 +++++--------- test/.gitignore | 2 + test/Makefile | 36 +++-- test/test_allreduce.cpp | 3 +- test/test_model_recover.cpp | 2 +- test/test_recover.cpp | 2 +- 15 files changed, 355 insertions(+), 100 deletions(-) rename src/{engine_base.cc => allreduce_base.cc} (99%) rename src/{engine_base.h => allreduce_base.h} (98%) rename src/{engine_robust-inl.h => allreduce_robust-inl.h} (99%) rename src/{engine_robust.cc => allreduce_robust.cc} (99%) rename src/{engine_robust.h => allreduce_robust.h} (98%) create mode 100644 src/engine_mpi.cc create mode 100644 src/rabit-inl.h create mode 100644 test/.gitignore diff --git a/src/engine_base.cc b/src/allreduce_base.cc similarity index 99% rename from src/engine_base.cc rename to src/allreduce_base.cc index 556b71e08..ca63f5c1c 100644 --- a/src/engine_base.cc +++ b/src/allreduce_base.cc @@ -1,5 +1,5 @@ /*! - * \file engine_base.cc + * \file allreduce_base.cc * \brief Basic implementation of AllReduce * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou @@ -8,7 +8,7 @@ #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX #include -#include "./engine_base.h" +#include "./allreduce_base.h" namespace rabit { namespace engine { diff --git a/src/engine_base.h b/src/allreduce_base.h similarity index 98% rename from src/engine_base.h rename to src/allreduce_base.h index 48d38aeb9..5ddf27635 100644 --- a/src/engine_base.h +++ b/src/allreduce_base.h @@ -1,5 +1,5 @@ /*! - * \file engine_base.h + * \file allreduce_base.h * \brief Basic implementation of AllReduce * using TCP non-block socket and tree-shape reduction. * @@ -8,8 +8,8 @@ * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ -#ifndef RABIT_ENGINE_BASE_H -#define RABIT_ENGINE_BASE_H +#ifndef RABIT_ALLREDUCE_BASE_H +#define RABIT_ALLREDUCE_BASE_H #include #include @@ -275,4 +275,4 @@ class AllReduceBase : public IEngine { }; } // namespace engine } // namespace rabit -#endif // RABIT_ENGINE_BASE_H +#endif // RABIT_ALLREDUCE_BASE_H diff --git a/src/engine_robust-inl.h b/src/allreduce_robust-inl.h similarity index 99% rename from src/engine_robust-inl.h rename to src/allreduce_robust-inl.h index 1eae685cc..cc9943282 100644 --- a/src/engine_robust-inl.h +++ b/src/allreduce_robust-inl.h @@ -1,5 +1,5 @@ /*! - * \file engine_robust-inl.h + * \file allreduce_robust-inl.h * \brief implementation of inline template function in AllReduceRobust * * \author Tianqi Chen diff --git a/src/engine_robust.cc b/src/allreduce_robust.cc similarity index 99% rename from src/engine_robust.cc rename to src/allreduce_robust.cc index 59a5b79a3..83b6a5fc8 100644 --- a/src/engine_robust.cc +++ b/src/allreduce_robust.cc @@ -1,5 +1,5 @@ /*! - * \file engine_robust.cc + * \file allreduce_robust.cc * \brief Robust implementation of AllReduce * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou @@ -11,7 +11,7 @@ #include #include "./io.h" #include "./utils.h" -#include "./engine_robust.h" +#include "./allreduce_robust.h" namespace rabit { namespace engine { diff --git a/src/engine_robust.h b/src/allreduce_robust.h similarity index 98% rename from src/engine_robust.h rename to src/allreduce_robust.h index 32aee1f2b..d9eee6d25 100644 --- a/src/engine_robust.h +++ b/src/allreduce_robust.h @@ -1,5 +1,5 @@ /*! - * \file engine_robust.h + * \file allreduce_robust.h * \brief Robust implementation of AllReduce * using TCP non-block socket and tree-shape reduction. * @@ -7,11 +7,11 @@ * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ -#ifndef RABIT_ENGINE_ROBUST_H -#define RABIT_ENGINE_ROBUST_H +#ifndef RABIT_ALLREDUCE_ROBUST_H +#define RABIT_ALLREDUCE_ROBUST_H #include #include "./engine.h" -#include "./engine_base.h" +#include "./allreduce_base.h" namespace rabit { namespace engine { @@ -70,7 +70,7 @@ class AllReduceRobust : public AllReduceBase { * this function is only used for test purpose */ virtual void InitAfterException(void) { - this->CheckAndRecover(kGetExcept); + //this->CheckAndRecover(kGetExcept); } private: @@ -371,6 +371,6 @@ class AllReduceRobust : public AllReduceBase { } // namespace engine } // namespace rabit // implementation of inline template function -#include "./engine_robust-inl.h" +#include "./allreduce_robust-inl.h" -#endif // RABIT_ENGINE_ROBUST_H +#endif // RABIT_ALLREDUCE_ROBUST_H diff --git a/src/engine.cc b/src/engine.cc index de58f4c4e..24ab1e588 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -10,8 +10,8 @@ #define NOMINMAX #include "./engine.h" -#include "./engine_base.h" -#include "./engine_robust.h" +#include "./allreduce_base.h" +#include "./allreduce_robust.h" namespace rabit { namespace engine { @@ -37,5 +37,14 @@ void Finalize(void) { IEngine *GetEngine(void) { return &manager; } +// perform in-place allreduce, on sendrecvbuf +void AllReduce_(void *sendrecvbuf, + size_t type_nbytes, + size_t count, + IEngine::ReduceFunction red, + mpi::DataType dtype, + mpi::OpType op) { + GetEngine()->AllReduce(sendrecvbuf, type_nbytes, count, red); +} } // namespace engine } // namespace rabit diff --git a/src/engine.h b/src/engine.h index 1c040a9e4..873c02588 100644 --- a/src/engine.h +++ b/src/engine.h @@ -105,6 +105,37 @@ void Finalize(void); /*! \brief singleton method to get engine */ IEngine *GetEngine(void); +/*! \brief namespace that contains staffs to be compatible with MPI */ +namespace mpi { +/*!\brief enum of all operators */ +enum OpType { + kMax, kMin, kSum, kBitwiseOR +}; +/*!\brief enum of supported data types */ +enum DataType { + kInt, + kUInt, + kDouble, + kFloat +}; +} // namespace mpi +/*! + * \brief perform in-place allreduce, on sendrecvbuf + * this is an internal function used by rabit to be able to compile with MPI + * do not use this function directly + * \param sendrecvbuf buffer for both sending and recving data + * \param type_nbytes the unit number of bytes the type have + * \param count number of elements to be reduced + * \param reducer reduce function + * \param dtype the data type + * \param op the reduce operator type + */ +void AllReduce_(void *sendrecvbuf, + size_t type_nbytes, + size_t count, + IEngine::ReduceFunction red, + mpi::DataType dtype, + mpi::OpType op); } // namespace engine } // namespace rabit #endif // RABIT_ENGINE_H diff --git a/src/engine_mpi.cc b/src/engine_mpi.cc new file mode 100644 index 000000000..c2e2a572d --- /dev/null +++ b/src/engine_mpi.cc @@ -0,0 +1,115 @@ +/*! + * \file engine_mpi.cc + * \brief this file gives an implementation of engine interface using MPI, + * this will allow rabit program to run with MPI, but do not comes with fault tolerant + * + * \author Tianqi Chen + */ +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE +#define NOMINMAX +#include "./engine.h" +#include "./utils.h" +#include + +namespace rabit { +namespace engine { +/*! \brief implementation of engine using MPI */ +class MPIEngine : public IEngine { + public: + MPIEngine(void) { + version_number = 0; + } + virtual void AllReduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer) { + utils::Error("MPIEngine:: AllReduce is not supported, use AllReduce_ instead"); + } + virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) { + MPI::COMM_WORLD.Bcast(sendrecvbuf_, size, MPI::CHAR, root); + } + virtual void InitAfterException(void) { + utils::Error("MPI is not fault tolerant"); + } + virtual int LoadCheckPoint(utils::ISerializable *p_model) { + return 0; + } + virtual void CheckPoint(const utils::ISerializable &model) { + version_number += 1; + } + virtual int VersionNumber(void) const { + return version_number; + } + /*! \brief get rank of current node */ + virtual int GetRank(void) const { + return MPI::COMM_WORLD.Get_rank(); + } + /*! \brief get total number of */ + virtual int GetWorldSize(void) const { + return MPI::COMM_WORLD.Get_size(); + } + /*! \brief get the host name of current node */ + virtual std::string GetHost(void) const { + int len; + char name[MPI_MAX_PROCESSOR_NAME]; + MPI::Get_processor_name(name, len); + name[len] = '\0'; + return std::string(name); + } + + private: + int version_number; +}; + +// singleton sync manager +MPIEngine manager; + +/*! \brief intiialize the synchronization module */ +void Init(int argc, char *argv[]) { + MPI::Init(argc, argv); +} +/*! \brief finalize syncrhonization module */ +void Finalize(void) { + MPI::Finalize(); +} + +/*! \brief singleton method to get engine */ +IEngine *GetEngine(void) { + return &manager; +} +// transform enum to MPI data type +inline MPI::Datatype GetType(mpi::DataType dtype) { + using namespace mpi; + switch(dtype) { + case kInt: return MPI::INT; + case kUInt: return MPI::UNSIGNED; + case kFloat: return MPI::FLOAT; + case kDouble: return MPI::DOUBLE; + } + utils::Error("unknown mpi::DataType"); + return MPI::CHAR; +} +// transform enum to MPI OP +inline MPI::Op GetOp(mpi::OpType otype) { + using namespace mpi; + switch(otype) { + case kMax: return MPI::MAX; + case kMin: return MPI::MIN; + case kSum: return MPI::SUM; + case kBitwiseOR: return MPI::BOR; + } + utils::Error("unknown mpi::OpType"); + return MPI::MAX; +} +// perform in-place allreduce, on sendrecvbuf +void AllReduce_(void *sendrecvbuf, + size_t type_nbytes, + size_t count, + IEngine::ReduceFunction red, + mpi::DataType dtype, + mpi::OpType op) { + MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, GetType(dtype), GetOp(op)); +} +} // namespace engine +} // namespace rabit diff --git a/src/rabit-inl.h b/src/rabit-inl.h new file mode 100644 index 000000000..bc3c4a4fb --- /dev/null +++ b/src/rabit-inl.h @@ -0,0 +1,123 @@ +/*! + * \file rabit-inl.h + * \brief implementation of inline template function for rabit interface + * + * \author Tianqi Chen + */ +#ifndef RABIT_RABIT_INL_H +#define RABIT_RABIT_INL_H + +namespace rabit { +namespace engine { +namespace mpi { +// template function to translate type to enum indicator +template +inline DataType GetType(void); +template<> +inline DataType GetType(void) { + return kInt; +} +template<> +inline DataType GetType(void) { + return kUInt; +} +template<> +inline DataType GetType(void) { + return kFloat; +} +template<> +inline DataType GetType(void) { + return kDouble; +} +} // namespace mpi +} // namespace engine + +namespace op { +struct Max { + const static engine::mpi::OpType kType = engine::mpi::kMax; + template + inline static void Reduce(DType &dst, const DType &src) { + if (dst < src) dst = src; + } +}; +struct Min { + const static engine::mpi::OpType kType = engine::mpi::kMin; + template + inline static void Reduce(DType &dst, const DType &src) { + if (dst > src) dst = src; + } +}; +struct Sum { + const static engine::mpi::OpType kType = engine::mpi::kSum; + template + inline static void Reduce(DType &dst, const DType &src) { + dst += src; + } +}; +struct BitOR { + const static engine::mpi::OpType kType = engine::mpi::kBitwiseOR; + template + inline static void Reduce(DType &dst, const DType &src) { + dst |= src; + } +}; +template +inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) { + const DType *src = (const DType*)src_; + DType *dst = (DType*)dst_; + for (int i = 0; i < len; ++i) { + OP::Reduce(dst[i], src[i]); + } +} +} // namespace op + +// intialize the rabit engine +inline void Init(int argc, char *argv[]) { + engine::Init(argc, argv); +} +// finalize the rabit engine +inline void Finalize(void) { + engine::Finalize(); +} +// get the rank of current process +inline int GetRank(void) { + return engine::GetEngine()->GetRank(); +} +// the the size of the world +inline int GetWorldSize(void) { + return engine::GetEngine()->GetWorldSize(); +} +// get the name of current processor +inline std::string GetProcessorName(void) { + return engine::GetEngine()->GetHost(); +} +// broadcast an std::string to all others from root +inline void Bcast(std::string *sendrecv_data, int root) { + engine::IEngine *e = engine::GetEngine(); + unsigned len = static_cast(sendrecv_data->length()); + e->Broadcast(&len, sizeof(len), root); + sendrecv_data->resize(len); + if (len != 0) { + e->Broadcast(&(*sendrecv_data)[0], len, root); + } +} +// perform inplace AllReduce +template +inline void AllReduce(DType *sendrecvbuf, size_t count) { + engine::AllReduce_(sendrecvbuf, sizeof(DType), count, op::Reducer, + engine::mpi::GetType(), OP::kType); +} +// load latest check point +inline int LoadCheckPoint(utils::ISerializable *p_model) { + return engine::GetEngine()->LoadCheckPoint(p_model); +} +// checkpoint the model, meaning we finished a stage of execution +inline void CheckPoint(const utils::ISerializable &model) { + engine::GetEngine()->CheckPoint(model); +} +// return the version number of currently stored model +inline int VersionNumber(void) { + return engine::GetEngine()->VersionNumber(); +} +} // namespace rabit +#endif diff --git a/src/rabit.h b/src/rabit.h index 5659798ec..0260ee52b 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -2,8 +2,9 @@ #define RABIT_RABIT_H /*! * \file rabit.h - * \brief This file defines a template wrapper of engine to give more flexible - * AllReduce operations + * \brief This file defines unified AllReduce/Broadcast interface of rabit + * The actual implementation is redirected to rabit engine + * Code only using this header can also compiled with MPI AllReduce(with no fault recovery), * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ @@ -13,53 +14,32 @@ namespace rabit { /*! \brief namespace of operator */ namespace op { -struct Max { - template - inline static void Reduce(DType &dst, const DType &src) { - if (dst < src) dst = src; - } -}; -struct Sum { - template - inline static void Reduce(DType &dst, const DType &src) { - dst += src; - } -}; -struct BitOR { - template - inline static void Reduce(DType &dst, const DType &src) { - dst |= src; - } -}; -template -inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) { - const DType *src = (const DType*)src_; - DType *dst = (DType*)dst_; - for (int i = 0; i < len; ++i) { - OP::Reduce(dst[i], src[i]); - } -} +/*! \brief maximum value */ +struct Max; +/*! \brief minimum value */ +struct Min; +/*! \brief perform sum */ +struct Sum; +/*! \brief perform bitwise OR */ +struct BitOR; } // namespace op -void Init(int argc, char *argv[]) { - engine::Init(argc, argv); -} -void Finalize(void) { - engine::Finalize(); -} - +/*! + * \brief intialize the rabit module, call this once function before using anything + * \param argc number of arguments in argv + * \param argv the array of input arguments + */ +inline void Init(int argc, char *argv[]); +/*! + * \brief finalize the rabit engine, call this function after you finished all jobs + */ +inline void Finalize(void); /*! \brief get rank of current process */ -inline int GetRank(void) { - return engine::GetEngine()->GetRank(); -} +inline int GetRank(void); /*! \brief get total number of process */ -int GetWorldSize(void) { - return engine::GetEngine()->GetWorldSize(); -} +inline int GetWorldSize(void); /*! \brief get name of processor */ -std::string GetProcessorName(void) { - return engine::GetEngine()->GetHost(); -} +inline std::string GetProcessorName(void); /*! * \brief broadcast an std::string to all others from root * \param sendrecv_data the pointer to send or recive buffer, @@ -67,15 +47,7 @@ std::string GetProcessorName(void) { * and string will be resized to correct length * \param root the root of process */ -inline void Bcast(std::string *sendrecv_data, int root) { - engine::IEngine *e = engine::GetEngine(); - unsigned len = static_cast(sendrecv_data->length()); - e->Broadcast(&len, sizeof(len), root); - sendrecv_data->resize(len); - if (len != 0) { - e->Broadcast(&(*sendrecv_data)[0], len, root); - } -} +inline void Bcast(std::string *sendrecv_data, int root); /*! * \brief perform in-place allreduce, on sendrecvbuf * this function is NOT thread-safe @@ -90,9 +62,7 @@ inline void Bcast(std::string *sendrecv_data, int root) { * \tparam DType type of data */ template -inline void AllReduce(DType *sendrecvbuf, size_t count) { - engine::GetEngine()->AllReduce(sendrecvbuf, sizeof(DType), count, op::Reducer); -} +inline void AllReduce(DType *sendrecvbuf, size_t count); /*! * \brief load latest check point * \param p_model pointer to the model @@ -110,9 +80,7 @@ inline void AllReduce(DType *sendrecvbuf, size_t count) { * * \sa CheckPoint, VersionNumber */ -inline int LoadCheckPoint(utils::ISerializable *p_model) { - return engine::GetEngine()->LoadCheckPoint(p_model); -} +inline int LoadCheckPoint(utils::ISerializable *p_model); /*! * \brief checkpoint the model, meaning we finished a stage of execution * every time we call check point, there is a version number which will increase by one @@ -120,16 +88,14 @@ inline int LoadCheckPoint(utils::ISerializable *p_model) { * \param p_model pointer to the model * \sa LoadCheckPoint, VersionNumber */ -inline void CheckPoint(const utils::ISerializable &model) { - engine::GetEngine()->CheckPoint(model); -} +inline void CheckPoint(const utils::ISerializable &model); /*! * \return version number of current stored model, * which means how many calls to CheckPoint we made so far * \sa LoadCheckPoint, CheckPoint */ -inline int VersionNumber(void) { - return engine::GetEngine()->VersionNumber(); -} +inline int VersionNumber(void); } // namespace rabit +// implementation of template functions +#include "./rabit-inl.h" #endif // RABIT_ALLREDUCE_H diff --git a/test/.gitignore b/test/.gitignore new file mode 100644 index 000000000..851969b1e --- /dev/null +++ b/test/.gitignore @@ -0,0 +1,2 @@ +*.mpi +test_* diff --git a/test/Makefile b/test/Makefile index a48fcd77c..bd14fff97 100644 --- a/test/Makefile +++ b/test/Makefile @@ -4,26 +4,31 @@ export MPICXX = mpicxx export LDFLAGS= -pthread -lm export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src -ifeq ($(no_omp),1) - CFLAGS += -DDISABLE_OPENMP -else - CFLAGS += -fopenmp -endif - # specify tensor path BIN = test_allreduce test_recover test_model_recover -OBJ = engine_base.o engine_robust.o engine.o +# objectives that makes up rabit library +RABIT_OBJ = allreduce_base.o allreduce_robust.o engine.o +MPIOBJ = engine_mpi.o + +OBJ = $(RABIT_OBJ) test_allreduce.o test_recover.o test_model_recover.o +MPIBIN = test_allreduce.mpi .PHONY: clean all all: $(BIN) $(MPIBIN) -engine_tcp.o: ../src/engine_tcp.cpp ../src/*.h -engine_base.o: ../src/engine_base.cc ../src/*.h +allreduce_base.o: ../src/allreduce_base.cc ../src/*.h engine.o: ../src/engine.cc ../src/*.h -engine_robust.o: ../src/engine_robust.cc ../src/*.h -test_allreduce: test_allreduce.cpp ../src/*.h $(OBJ) -test_recover: test_recover.cpp ../src/*.h $(OBJ) -test_model_recover: test_model_recover.cpp ../src/*.h $(OBJ) +allreduce_robust.o: ../src/allreduce_robust.cc ../src/*.h +engine_mpi.o: ../src/engine_mpi.cc +test_allreduce.o: test_allreduce.cpp ../src/*.h +test_recover.o: test_recover.cpp ../src/*.h +test_model_recover.o: test_model_recover.cpp ../src/*.h + +# we can link against MPI version to get use MPI +test_allreduce: test_allreduce.o $(RABIT_OBJ) +test_allreduce.mpi: test_allreduce.o $(MPIOBJ) +test_recover: test_recover.o $(RABIT_OBJ) +test_model_recover: test_model_recover.o $(RABIT_OBJ) $(BIN) : $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) @@ -32,7 +37,10 @@ $(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) $(MPIBIN) : - $(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) + $(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) + +$(MPIOBJ) : + $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) clean: $(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~ diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 7f9ad9f78..e0fc9843f 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -79,7 +79,8 @@ int main(int argc, char *argv[]) { utils::LogPrintf("[%d] !!!TestMax pass\n", rank); TestSum(mock, n); utils::LogPrintf("[%d] !!!TestSum pass\n", rank); - for (int i = 0; i < nproc; i += nproc / 3) { + int step = std::max(nproc / 3, 1); + for (int i = 0; i < nproc; i += step) { TestBcast(mock, n, i); } utils::LogPrintf("[%d] !!!TestBcast pass\n", rank); diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index a7f4d7677..c6d2973ce 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) { } break; } catch (MockException &e) { - //rabit::engine::GetEngine()->InitAfterException(); + rabit::engine::GetEngine()->InitAfterException(); ++ntrial; } } diff --git a/test/test_recover.cpp b/test/test_recover.cpp index 761226889..9dfc7f60a 100644 --- a/test/test_recover.cpp +++ b/test/test_recover.cpp @@ -115,7 +115,7 @@ int main(int argc, char *argv[]) { // reach here break; } catch (MockException &e) { - //rabit::engine::GetEngine()->InitAfterException(); + rabit::engine::GetEngine()->InitAfterException(); ++ntrial; } } From e4abca949494ea6186d2b6b64ba58b7853eaa35a Mon Sep 17 00:00:00 2001 From: nachocano Date: Tue, 2 Dec 2014 11:28:20 -0800 Subject: [PATCH 045/531] changing report folder to doc --- {report => doc}/.gitignore | 0 {report => doc}/fig/allreduce.pdf | Bin {report => doc}/rabit.bib | 0 {report => doc}/rabit.tex | 15 ++++++++++----- 4 files changed, 10 insertions(+), 5 deletions(-) rename {report => doc}/.gitignore (100%) rename {report => doc}/fig/allreduce.pdf (100%) rename {report => doc}/rabit.bib (100%) rename {report => doc}/rabit.tex (95%) diff --git a/report/.gitignore b/doc/.gitignore similarity index 100% rename from report/.gitignore rename to doc/.gitignore diff --git a/report/fig/allreduce.pdf b/doc/fig/allreduce.pdf similarity index 100% rename from report/fig/allreduce.pdf rename to doc/fig/allreduce.pdf diff --git a/report/rabit.bib b/doc/rabit.bib similarity index 100% rename from report/rabit.bib rename to doc/rabit.bib diff --git a/report/rabit.tex b/doc/rabit.tex similarity index 95% rename from report/rabit.tex rename to doc/rabit.tex index 8dd4b4508..e2c96d53f 100644 --- a/report/rabit.tex +++ b/doc/rabit.tex @@ -30,14 +30,14 @@ In this work, we propose RABIT, an AllReduce library suitable for distributed ma \section{Introduction} Distributed machine learning is an active research area that has seen an incredible grow in recent years. Several approaches have been proposed, e.g. parameter server abstraction, graph approaches, among others \cite{paramServer,DuchiAW12,Zinkevich,Dekel,Low}. The closest example to our work is proposed by Agarwal et al. \cite{Agarwal}, in which they have a tree-shape communication infrastructure that efficiently accumulates and broadcasts values to every node involved in a computation. - - +\todo{add more} \section{AllReduce} In AllReduce settings, nodes are organized in a tree structure. Each node holds a portion of the data and computes some values on it. Those values are passed up the tree and aggregated, until a global aggregate value is calculated in the root node (reduce). The global value is then passed down to all other nodes (broadcast). Figure \ref{allreduce} shows an example of an AllReduce sum operation. The leaf nodes passed data to their parents (interior nodes). Such interior nodes compute an intermediate aggregate and pass the value to the root, which in turn computes the final aggregate and then passes back the result to every node in the cluster. +\todo{add more} \begin{figure}[tb] \centering @@ -55,7 +55,7 @@ Figure \ref{allreduce} shows an example of an AllReduce sum operation. The leaf The design of RABIT was motivated by the following needs: \begin{enumerate} - \item \emph{Distributed}: machine learning algorithms are inherently iterative and computation intensive. Given the vast amount of data they can work on, it may be intractable to perform all the processing on a single machine. Instead, we want to divide the computation into different nodes, each one would be in charge of computing statistics on some portion of the data, and then a combination step would take place, where all those independent local solutions will be aggregated into a single result. + \item \emph{Distributed}: machine learning algorithms are inherently iterative and computation intensive. Given the vast amount of data they can work on, it may be intractable to perform all the processing on a single machine. Instead, we want to divide the computation into different nodes, each one would be in charge of computing statistics on some portion of the data, and then have a combination step, where all those independent local solutions will be aggregated into a single result. \item \emph{Scalability}: we want our solution to handle a growing amount of work in a capable manner, i.e. we should be able to accommodate to data and computation growth by adding more nodes. \item \emph{Fault Tolerance}: we assume an environment where failures happen, either machines can go down or communication failures occur. Given the computation intensive nature of machine learning problems, we want to be able to continue operating properly in the event of a failure, instead of starting the process all over again. \item \emph{Programmability}: we want to provide a clean interface that can be easily used by programmers. With few lines of code, they should be able to have a fault-tolerant AllReduce implementation. @@ -64,9 +64,14 @@ The design of RABIT was motivated by the following needs: \item \emph{Footprint}: we want to have a low memory footprint while running as well as provide a lightweight footprint library. \end{enumerate} -\subsection{Interface} +\subsection{Proposed Solution} -\todo{add sync module interface, example of how to use the library} +\todo{what we did} + + +\subsubsection{Interface} + +\todo{API, how to use it} \section{Evaluation} From ed1de6df80a426dc82250a99d32ae3e9f0d73776 Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 2 Dec 2014 21:11:48 -0800 Subject: [PATCH 046/531] change AllReduce to Allreduce --- src/allreduce_base.cc | 30 +++++++++++------------ src/allreduce_base.h | 28 +++++++++++----------- src/allreduce_robust-inl.h | 6 ++--- src/allreduce_robust.cc | 48 ++++++++++++++++++------------------- src/allreduce_robust.h | 14 +++++------ src/engine.cc | 6 ++--- src/engine.h | 6 ++--- src/engine_mpi.cc | 6 ++--- src/mock.h | 4 ++-- src/rabit-inl.h | 6 ++--- src/rabit.h | 8 +++---- src/tcp_master.py | 2 +- test/test_allreduce.cpp | 4 ++-- test/test_model_recover.cpp | 4 ++-- test/test_recover.cpp | 4 ++-- 15 files changed, 88 insertions(+), 88 deletions(-) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index ca63f5c1c..0cfb9fd34 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -13,7 +13,7 @@ namespace rabit { namespace engine { // constructor -AllReduceBase::AllReduceBase(void) { +AllreduceBase::AllreduceBase(void) { master_uri = "NULL"; master_port = 9000; host_uri = ""; @@ -26,7 +26,7 @@ AllReduceBase::AllReduceBase(void) { } // initialization function -void AllReduceBase::Init(void) { +void AllreduceBase::Init(void) { utils::Socket::Startup(); // single node mode if (master_uri == "NULL") return; @@ -68,7 +68,7 @@ void AllReduceBase::Init(void) { utils::Assert(master.RecvAll(&hname[0], len) == static_cast(len), "sync::Init failure 10"); utils::Assert(master.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "sync::Init failure 11"); links[0].sock.Create(); - links[0].sock.Connect(utils::SockAddr(hname.c_str(), hport)); + links[0].sock.Connect(utils::SockAddr(hname.c_str(), hport)); utils::Assert(links[0].sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 12"); utils::Assert(links[0].sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 13"); utils::Check(magic == kMagic, "sync::Init failure, parent magic number mismatch"); @@ -105,7 +105,7 @@ void AllReduceBase::Init(void) { // done } -void AllReduceBase::Shutdown(void) { +void AllreduceBase::Shutdown(void) { for (size_t i = 0; i < links.size(); ++i) { links[i].sock.Close(); } @@ -117,7 +117,7 @@ void AllReduceBase::Shutdown(void) { * \param name parameter name * \param val parameter value */ -void AllReduceBase::SetParam(const char *name, const char *val) { +void AllreduceBase::SetParam(const char *name, const char *val) { if (!strcmp(name, "master_uri")) master_uri = val; if (!strcmp(name, "master_port")) master_port = atoi(val); if (!strcmp(name, "reduce_buffer")) { @@ -140,10 +140,10 @@ void AllReduceBase::SetParam(const char *name, const char *val) { /*! * \brief perform in-place allreduce, on sendrecvbuf, this function can fail, and will return the cause of failure * - * NOTE on AllReduce: - * The kSuccess TryAllReduce does NOT mean every node have successfully finishes TryAllReduce. - * It only means the current node get the correct result of AllReduce. - * However, it means every node finishes LAST call(instead of this one) of AllReduce/Bcast + * NOTE on Allreduce: + * The kSuccess TryAllreduce does NOT mean every node have successfully finishes TryAllreduce. + * It only means the current node get the correct result of Allreduce. + * However, it means every node finishes LAST call(instead of this one) of Allreduce/Bcast * * \param sendrecvbuf_ buffer for both sending and recving data * \param type_nbytes the unit number of bytes the type have @@ -152,8 +152,8 @@ void AllReduceBase::SetParam(const char *name, const char *val) { * \return this function can return kSuccess, kSockError, kGetExcept, see ReturnType for details * \sa ReturnType */ -AllReduceBase::ReturnType -AllReduceBase::TryAllReduce(void *sendrecvbuf_, +AllreduceBase::ReturnType +AllreduceBase::TryAllreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) { @@ -248,7 +248,7 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, size_t start = size_up_reduce % buffer_size; // peform read till end of buffer size_t nread = std::min(buffer_size - start, max_reduce - size_up_reduce); - utils::Assert(nread % type_nbytes == 0, "AllReduce: size check"); + utils::Assert(nread % type_nbytes == 0, "Allreduce: size check"); for (int i = 0; i < nlink; ++i) { if (i != parent_index) { reducer(links[i].buffer_head + start, @@ -280,7 +280,7 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, } if (len != -1) { size_down_in += static_cast(len); - utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error"); + utils::Assert(size_down_in <= size_up_out, "Allreduce: boundary error"); } else { if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; } @@ -306,8 +306,8 @@ AllReduceBase::TryAllReduce(void *sendrecvbuf_, * \return this function can return kSuccess, kSockError, kGetExcept, see ReturnType for details * \sa ReturnType */ -AllReduceBase::ReturnType -AllReduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { +AllreduceBase::ReturnType +AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { if (links.size() == 0 || total_size == 0) return kSuccess; utils::Check(root < world_size, "Broadcast: root should be smaller than world size"); // number of links diff --git a/src/allreduce_base.h b/src/allreduce_base.h index 5ddf27635..578b941f1 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -27,14 +27,14 @@ class Datatype { } namespace rabit { namespace engine { -/*! \brief implementation of basic AllReduce engine */ -class AllReduceBase : public IEngine { +/*! \brief implementation of basic Allreduce engine */ +class AllreduceBase : public IEngine { public: // magic number to verify server const static int kMagic = 0xff99; // constant one byte out of band message to indicate error happening - AllReduceBase(void); - virtual ~AllReduceBase(void) {} + AllreduceBase(void); + virtual ~AllreduceBase(void) {} // initialize the manager void Init(void); // shutdown the engine @@ -65,12 +65,12 @@ class AllReduceBase : public IEngine { * \param count number of elements to be reduced * \param reducer reduce function */ - virtual void AllReduce(void *sendrecvbuf_, + virtual void Allreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) { - utils::Assert(TryAllReduce(sendrecvbuf_, type_nbytes, count, reducer) == kSuccess, - "AllReduce failed"); + utils::Assert(TryAllreduce(sendrecvbuf_, type_nbytes, count, reducer) == kSuccess, + "Allreduce failed"); } /*! * \brief broadcast data from root to all nodes @@ -80,7 +80,7 @@ class AllReduceBase : public IEngine { */ virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { utils::Assert(TryBroadcast(sendrecvbuf_, total_size, root) == kSuccess, - "AllReduce failed"); + "Allreduce failed"); } /*! * \brief load latest check point @@ -171,7 +171,7 @@ class AllReduceBase : public IEngine { */ inline bool ReadToRingBuffer(size_t protect_start) { size_t ngap = size_read - protect_start; - utils::Assert(ngap <= buffer_size, "AllReduce: boundary check"); + utils::Assert(ngap <= buffer_size, "Allreduce: boundary check"); size_t offset = size_read % buffer_size; size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); if (nmax == 0) return true; @@ -225,10 +225,10 @@ class AllReduceBase : public IEngine { /*! * \brief perform in-place allreduce, on sendrecvbuf, this function can fail, and will return the cause of failure * - * NOTE on AllReduce: - * The kSuccess TryAllReduce does NOT mean every node have successfully finishes TryAllReduce. - * It only means the current node get the correct result of AllReduce. - * However, it means every node finishes LAST call(instead of this one) of AllReduce/Bcast + * NOTE on Allreduce: + * The kSuccess TryAllreduce does NOT mean every node have successfully finishes TryAllreduce. + * It only means the current node get the correct result of Allreduce. + * However, it means every node finishes LAST call(instead of this one) of Allreduce/Bcast * * \param sendrecvbuf_ buffer for both sending and recving data * \param type_nbytes the unit number of bytes the type have @@ -237,7 +237,7 @@ class AllReduceBase : public IEngine { * \return this function can return kSuccess, kSockError, kGetExcept, see ReturnType for details * \sa ReturnType */ - ReturnType TryAllReduce(void *sendrecvbuf_, + ReturnType TryAllreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer); diff --git a/src/allreduce_robust-inl.h b/src/allreduce_robust-inl.h index cc9943282..f1f557593 100644 --- a/src/allreduce_robust-inl.h +++ b/src/allreduce_robust-inl.h @@ -1,6 +1,6 @@ /*! * \file allreduce_robust-inl.h - * \brief implementation of inline template function in AllReduceRobust + * \brief implementation of inline template function in AllreduceRobust * * \author Tianqi Chen */ @@ -29,8 +29,8 @@ namespace engine { * \tparam NodeType type of node value */ template -inline AllReduceRobust::ReturnType -AllReduceRobust::MsgPassing(const NodeType &node_value, +inline AllreduceRobust::ReturnType +AllreduceRobust::MsgPassing(const NodeType &node_value, std::vector *p_edge_in, std::vector *p_edge_out, EdgeType (*func) (const NodeType &node_value, diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 83b6a5fc8..d2339a3be 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -1,6 +1,6 @@ /*! * \file allreduce_robust.cc - * \brief Robust implementation of AllReduce + * \brief Robust implementation of Allreduce * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ @@ -15,12 +15,12 @@ namespace rabit { namespace engine { -AllReduceRobust::AllReduceRobust(void) { +AllreduceRobust::AllreduceRobust(void) { result_buffer_round = 1; seq_counter = 0; } /*! \brief shutdown the engine */ -void AllReduceRobust::Shutdown(void) { +void AllreduceRobust::Shutdown(void) { // need to sync the exec before we shutdown, do a pesudo check point // execute checkpoint, note: when checkpoint existing, load will not happen utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kMaxSeq), @@ -30,15 +30,15 @@ void AllReduceRobust::Shutdown(void) { // execute check ack step, load happens here utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kMaxSeq), "check ack must return true"); - AllReduceBase::Shutdown(); + AllreduceBase::Shutdown(); } /*! * \brief set parameters to the engine * \param name parameter name * \param val parameter value */ -void AllReduceRobust::SetParam(const char *name, const char *val) { - AllReduceBase::SetParam(name, val); +void AllreduceRobust::SetParam(const char *name, const char *val) { + AllreduceBase::SetParam(name, val); if (!strcmp(name, "result_buffer_round")) result_buffer_round = atoi(val); if (!strcmp(name, "result_replicate")) { result_buffer_round = std::max(world_size / atoi(val), 1); @@ -52,7 +52,7 @@ void AllReduceRobust::SetParam(const char *name, const char *val) { * \param count number of elements to be reduced * \param reducer reduce function */ -void AllReduceRobust::AllReduce(void *sendrecvbuf_, +void AllreduceRobust::Allreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) { @@ -68,7 +68,7 @@ void AllReduceRobust::AllReduce(void *sendrecvbuf_, std::memcpy(temp, sendrecvbuf_, type_nbytes * count); break; } else { std::memcpy(temp, sendrecvbuf_, type_nbytes * count); - if (CheckAndRecover(TryAllReduce(temp, type_nbytes, count, reducer))) { + if (CheckAndRecover(TryAllreduce(temp, type_nbytes, count, reducer))) { std::memcpy(sendrecvbuf_, temp, type_nbytes * count); break; } else { recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter); @@ -84,7 +84,7 @@ void AllReduceRobust::AllReduce(void *sendrecvbuf_, * \param size the size of the data to be broadcasted * \param root the root worker id to broadcast the data */ -void AllReduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) { +void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) { bool recovered = RecoverExec(sendrecvbuf_, total_size, 0, seq_counter); // now we are free to remove the last result, if any if (resbuf.LastSeqNo() != -1 && @@ -114,7 +114,7 @@ void AllReduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) * the p_model is not touched, user should do necessary initialization by themselves * \sa CheckPoint, VersionNumber */ -int AllReduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { +int AllreduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { // check if we succesfll if (RecoverExec(NULL, 0, ActionSummary::kLoadCheck, ActionSummary::kMaxSeq)) { // reset result buffer @@ -142,7 +142,7 @@ int AllReduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { * \param p_model pointer to the model * \sa LoadCheckPoint, VersionNumber */ -void AllReduceRobust::CheckPoint(const utils::ISerializable &model) { +void AllreduceRobust::CheckPoint(const utils::ISerializable &model) { // increase version number version_number += 1; // save model @@ -168,7 +168,7 @@ void AllReduceRobust::CheckPoint(const utils::ISerializable &model) { * when kSockError is returned, it simply means there are bad sockets in the links, * and some link recovery proceduer is needed */ -AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { +AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { // number of links const int nlink = static_cast(links.size()); for (int i = 0; i < nlink; ++i) { @@ -285,7 +285,7 @@ AllReduceRobust::ReturnType AllReduceRobust::TryResetLinks(void) { * \brief try to reconnect the broken links * \return this function can kSuccess or kSockError */ -AllReduceRobust::ReturnType AllReduceRobust::TryReConnectLinks(void) { +AllreduceRobust::ReturnType AllreduceRobust::TryReConnectLinks(void) { utils::Error("TryReConnectLinks: not implemented"); return kSuccess; } @@ -296,7 +296,7 @@ AllReduceRobust::ReturnType AllReduceRobust::TryReConnectLinks(void) { * \param err_type the type of error happening in the system * \return true if err_type is kSuccess, false otherwise */ -bool AllReduceRobust::CheckAndRecover(ReturnType err_type) { +bool AllreduceRobust::CheckAndRecover(ReturnType err_type) { if (err_type == kSuccess) return true; while(err_type != kSuccess) { switch(err_type) { @@ -383,8 +383,8 @@ inline char DataRequest(const std::pair &node_value, * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details * \sa ReturnType */ -AllReduceRobust::ReturnType -AllReduceRobust::TryDecideRouting(AllReduceRobust::RecoverType role, +AllreduceRobust::ReturnType +AllreduceRobust::TryDecideRouting(AllreduceRobust::RecoverType role, size_t *p_size, int *p_recvlink, std::vector *p_req_in) { @@ -398,7 +398,7 @@ AllReduceRobust::TryDecideRouting(AllReduceRobust::RecoverType role, for (size_t i = 0; i < dist_in.size(); ++i) { if (dist_in[i].first != std::numeric_limits::max()) { utils::Check(best_link == -2 || *p_size == dist_in[i].second, - "[%d] AllReduce size inconsistent, distin=%lu, size=%lu, reporting=%lu\n", + "[%d] Allreduce size inconsistent, distin=%lu, size=%lu, reporting=%lu\n", rank, dist_in[i].first, *p_size, dist_in[i].second); if (best_link == -2 || dist_in[i].first < dist_in[best_link].first) { best_link = static_cast(i); @@ -444,8 +444,8 @@ AllReduceRobust::TryDecideRouting(AllReduceRobust::RecoverType role, * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details * \sa ReturnType, TryDecideRouting */ -AllReduceRobust::ReturnType -AllReduceRobust::TryRecoverData(RecoverType role, +AllreduceRobust::ReturnType +AllreduceRobust::TryRecoverData(RecoverType role, void *sendrecvbuf_, size_t size, int recv_link, @@ -546,7 +546,7 @@ AllReduceRobust::TryRecoverData(RecoverType role, * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details * \sa ReturnType */ -AllReduceRobust::ReturnType AllReduceRobust::TryLoadCheckPoint(bool requester) { +AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) { RecoverType role = requester ? kRequestData : kHaveData; size_t size = this->checked_model.length(); int recv_link; @@ -573,8 +573,8 @@ AllReduceRobust::ReturnType AllReduceRobust::TryLoadCheckPoint(bool requester) { * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details * \sa ReturnType */ -AllReduceRobust::ReturnType -AllReduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { RecoverType role; +AllreduceRobust::ReturnType +AllreduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { RecoverType role; if (!requester) { sendrecvbuf = resbuf.Query(seqno, &size); role = sendrecvbuf != NULL ? kHaveData : kPassData; @@ -605,7 +605,7 @@ AllReduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool re * result by recovering procedure, the action is complete, no further action is needed * - false means this is the lastest action that has not yet been executed, need to execute the action */ -bool AllReduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { +bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { if (flag != 0) { utils::Assert(seqno == ActionSummary::kMaxSeq, "must only set seqno for normal operations"); } @@ -615,7 +615,7 @@ bool AllReduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { // action ActionSummary act = req; // get the reduced action - if (!CheckAndRecover(TryAllReduce(&act, sizeof(act), 1, ActionSummary::Reducer))) continue; + if (!CheckAndRecover(TryAllreduce(&act, sizeof(act), 1, ActionSummary::Reducer))) continue; if (act.check_ack()) { if (act.check_point()) { // if we also have check_point, do check point first diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index d9eee6d25..26e45f16c 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -1,6 +1,6 @@ /*! * \file allreduce_robust.h - * \brief Robust implementation of AllReduce + * \brief Robust implementation of Allreduce * using TCP non-block socket and tree-shape reduction. * * This implementation considers the failure of nodes @@ -16,10 +16,10 @@ namespace rabit { namespace engine { /*! \brief implementation of fault tolerant all reduce engine */ -class AllReduceRobust : public AllReduceBase { +class AllreduceRobust : public AllreduceBase { public: - AllReduceRobust(void); - virtual ~AllReduceRobust(void) {} + AllreduceRobust(void); + virtual ~AllreduceRobust(void) {} /*! \brief shutdown the engine */ virtual void Shutdown(void); /*! @@ -36,7 +36,7 @@ class AllReduceRobust : public AllReduceBase { * \param count number of elements to be reduced * \param reducer reduce function */ - virtual void AllReduce(void *sendrecvbuf_, + virtual void Allreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer); @@ -142,7 +142,7 @@ class AllReduceRobust : public AllReduceBase { inline int flag(void) const { return seqcode & 15; } - // reducer for AllReduce, used to get the result ActionSummary from all nodes + // reducer for Allreduce, used to get the result ActionSummary from all nodes inline static void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) { const ActionSummary *src = (const ActionSummary*)src_; ActionSummary *dst = (ActionSummary*)dst_; @@ -162,7 +162,7 @@ class AllReduceRobust : public AllReduceBase { // internel sequence code int seqcode; }; - /*! \brief data structure to remember result of Bcast and AllReduce calls */ + /*! \brief data structure to remember result of Bcast and Allreduce calls */ class ResultBuffer { public: // constructor diff --git a/src/engine.cc b/src/engine.cc index 24ab1e588..0512ac503 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -16,7 +16,7 @@ namespace rabit { namespace engine { // singleton sync manager -AllReduceRobust manager; +AllreduceRobust manager; /*! \brief intiialize the synchronization module */ void Init(int argc, char *argv[]) { @@ -38,13 +38,13 @@ IEngine *GetEngine(void) { return &manager; } // perform in-place allreduce, on sendrecvbuf -void AllReduce_(void *sendrecvbuf, +void Allreduce_(void *sendrecvbuf, size_t type_nbytes, size_t count, IEngine::ReduceFunction red, mpi::DataType dtype, mpi::OpType op) { - GetEngine()->AllReduce(sendrecvbuf, type_nbytes, count, red); + GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red); } } // namespace engine } // namespace rabit diff --git a/src/engine.h b/src/engine.h index 873c02588..6d95fe5dc 100644 --- a/src/engine.h +++ b/src/engine.h @@ -16,7 +16,7 @@ class Datatype; namespace rabit { /*! \brief core interface of engine */ namespace engine { -/*! \brief interface of core AllReduce engine */ +/*! \brief interface of core Allreduce engine */ class IEngine { public: /*! @@ -41,7 +41,7 @@ class IEngine { * \param count number of elements to be reduced * \param reducer reduce function */ - virtual void AllReduce(void *sendrecvbuf_, + virtual void Allreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) = 0; @@ -130,7 +130,7 @@ enum DataType { * \param dtype the data type * \param op the reduce operator type */ -void AllReduce_(void *sendrecvbuf, +void Allreduce_(void *sendrecvbuf, size_t type_nbytes, size_t count, IEngine::ReduceFunction red, diff --git a/src/engine_mpi.cc b/src/engine_mpi.cc index c2e2a572d..03bd0cb73 100644 --- a/src/engine_mpi.cc +++ b/src/engine_mpi.cc @@ -20,11 +20,11 @@ class MPIEngine : public IEngine { MPIEngine(void) { version_number = 0; } - virtual void AllReduce(void *sendrecvbuf_, + virtual void Allreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) { - utils::Error("MPIEngine:: AllReduce is not supported, use AllReduce_ instead"); + utils::Error("MPIEngine:: Allreduce is not supported, use Allreduce_ instead"); } virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) { MPI::COMM_WORLD.Bcast(sendrecvbuf_, size, MPI::CHAR, root); @@ -103,7 +103,7 @@ inline MPI::Op GetOp(mpi::OpType otype) { return MPI::MAX; } // perform in-place allreduce, on sendrecvbuf -void AllReduce_(void *sendrecvbuf, +void Allreduce_(void *sendrecvbuf, size_t type_nbytes, size_t count, IEngine::ReduceFunction red, diff --git a/src/mock.h b/src/mock.h index 1dd004c8b..5c85b841f 100644 --- a/src/mock.h +++ b/src/mock.h @@ -25,9 +25,9 @@ public: } template - inline void AllReduce(float *sendrecvbuf, size_t count) { + inline void Allreduce(float *sendrecvbuf, size_t count) { utils::Assert(verify(allReduce), "[%d] error when calling allReduce", rank); - rabit::AllReduce(sendrecvbuf, count); + rabit::Allreduce(sendrecvbuf, count); } inline bool LoadCheckPoint(utils::ISerializable *p_model) { diff --git a/src/rabit-inl.h b/src/rabit-inl.h index bc3c4a4fb..4ea741efe 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -101,10 +101,10 @@ inline void Bcast(std::string *sendrecv_data, int root) { e->Broadcast(&(*sendrecv_data)[0], len, root); } } -// perform inplace AllReduce +// perform inplace Allreduce template -inline void AllReduce(DType *sendrecvbuf, size_t count) { - engine::AllReduce_(sendrecvbuf, sizeof(DType), count, op::Reducer, +inline void Allreduce(DType *sendrecvbuf, size_t count) { + engine::Allreduce_(sendrecvbuf, sizeof(DType), count, op::Reducer, engine::mpi::GetType(), OP::kType); } // load latest check point diff --git a/src/rabit.h b/src/rabit.h index 0260ee52b..859b5488a 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -2,9 +2,9 @@ #define RABIT_RABIT_H /*! * \file rabit.h - * \brief This file defines unified AllReduce/Broadcast interface of rabit + * \brief This file defines unified Allreduce/Broadcast interface of rabit * The actual implementation is redirected to rabit engine - * Code only using this header can also compiled with MPI AllReduce(with no fault recovery), + * Code only using this header can also compiled with MPI Allreduce(with no fault recovery), * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ @@ -54,7 +54,7 @@ inline void Bcast(std::string *sendrecv_data, int root); * Example Usage: the following code gives sum of the result * vector data(10); * ... - * AllReduce(&data[0], data.size()); + * Allreduce(&data[0], data.size()); * ... * \param sendrecvbuf buffer for both sending and recving data * \param count number of elements to be reduced @@ -62,7 +62,7 @@ inline void Bcast(std::string *sendrecv_data, int root); * \tparam DType type of data */ template -inline void AllReduce(DType *sendrecvbuf, size_t count); +inline void Allreduce(DType *sendrecvbuf, size_t count); /*! * \brief load latest check point * \param p_model pointer to the model diff --git a/src/tcp_master.py b/src/tcp_master.py index c0820f14b..015b48784 100644 --- a/src/tcp_master.py +++ b/src/tcp_master.py @@ -68,7 +68,7 @@ class Master: try: magic = slave.recvint() if magic != kMagic: - print 'invalid magic number=%d from %s' % (magic, s_addr[0]) + print 'invalid magic number=%d from %s' % (magic, s_addr[0]) slave.sock.close() continue except socket.error: diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index e0fc9843f..625d9592a 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -15,7 +15,7 @@ inline void TestMax(test::Mock &mock, size_t n) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % 111; } - mock.AllReduce(&ndata[0], ndata.size()); + mock.Allreduce(&ndata[0], ndata.size()); for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % 111; for (int r = 0; r < nproc; ++r) { @@ -34,7 +34,7 @@ inline void TestSum(test::Mock &mock, size_t n) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z; } - mock.AllReduce(&ndata[0], ndata.size()); + mock.Allreduce(&ndata[0], ndata.size()); for (size_t i = 0; i < ndata.size(); ++i) { float rsum = 0.0f; for (int r = 0; r < nproc; ++r) { diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index c6d2973ce..c482c266c 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -39,7 +39,7 @@ inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + model->data[i]; } - mock.AllReduce(&ndata[0], ndata.size()); + mock.Allreduce(&ndata[0], ndata.size()); if (ntrial == iter && rank == 3) { throw MockException(); } @@ -62,7 +62,7 @@ inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + model->data[i]; } - mock.AllReduce(&ndata[0], ndata.size()); + mock.Allreduce(&ndata[0], ndata.size()); if (ntrial == iter && rank == 0) { throw MockException(); diff --git a/test/test_recover.cpp b/test/test_recover.cpp index 9dfc7f60a..92aa60918 100644 --- a/test/test_recover.cpp +++ b/test/test_recover.cpp @@ -18,7 +18,7 @@ inline void TestMax(test::Mock &mock, size_t n, int ntrial) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % 111; } - mock.AllReduce(&ndata[0], ndata.size()); + mock.Allreduce(&ndata[0], ndata.size()); if (ntrial == 0 && rank == 15) throw MockException(); for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % 111; @@ -38,7 +38,7 @@ inline void TestSum(test::Mock &mock, size_t n, int ntrial) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z; } - mock.AllReduce(&ndata[0], ndata.size()); + mock.Allreduce(&ndata[0], ndata.size()); if (ntrial == 0 && rank == 0) throw MockException(); From 56aad86231ea9a43171da4806fdddb6416ea04c9 Mon Sep 17 00:00:00 2001 From: nachocano Date: Wed, 3 Dec 2014 01:16:13 -0800 Subject: [PATCH 047/531] adding incomplete kmeans. I'm having a problem with the broadcast, and still need to implement the logic --- toolkit/Makefile | 42 +++++++++++++ toolkit/kmeans.cpp | 149 +++++++++++++++++++++++++++++++++++++++++++++ toolkit/kmeans.sh | 8 +++ 3 files changed, 199 insertions(+) create mode 100644 toolkit/Makefile create mode 100644 toolkit/kmeans.cpp create mode 100755 toolkit/kmeans.sh diff --git a/toolkit/Makefile b/toolkit/Makefile new file mode 100644 index 000000000..fc73737b7 --- /dev/null +++ b/toolkit/Makefile @@ -0,0 +1,42 @@ +export CC = gcc +export CXX = g++ +export MPICXX = mpicxx +export LDFLAGS= -pthread -lm +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src + +# specify tensor path +BIN = kmeans +# objectives that makes up rabit library +RABIT_OBJ = allreduce_base.o allreduce_robust.o engine.o +MPIOBJ = engine_mpi.o + +OBJ = $(RABIT_OBJ) kmeans.o +MPIBIN = kmeans.mpi +.PHONY: clean all + +all: $(BIN) $(MPIBIN) + +allreduce_base.o: ../src/allreduce_base.cc ../src/*.h +engine.o: ../src/engine.cc ../src/*.h +allreduce_robust.o: ../src/allreduce_robust.cc ../src/*.h +engine_mpi.o: ../src/engine_mpi.cc +kmeans.o: kmeans.cpp ../src/*.h + +# we can link against MPI version to get use MPI +kmeans: kmeans.o $(RABIT_OBJ) +kmeans.mpi: kmeans.o $(MPIOBJ) + +$(BIN) : + $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) + +$(OBJ) : + $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) + +$(MPIBIN) : + $(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) + +$(MPIOBJ) : + $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) + +clean: + $(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~ diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp new file mode 100644 index 000000000..ecd8b5a7a --- /dev/null +++ b/toolkit/kmeans.cpp @@ -0,0 +1,149 @@ +// this is a test case to test whether rabit can recover model when +// facing an exception +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace rabit; + +class Model : public rabit::utils::ISerializable { + public: + std::vector data; + // load from stream + virtual void Load(rabit::utils::IStream &fi) { + fi.Read(&data); + } + /*! \brief save the model to the stream */ + virtual void Save(rabit::utils::IStream &fo) const { + fo.Write(data); + } + virtual void InitModel(int k, int d) { + data.resize(k * d + k, 0.0f); + } + +}; + +inline void KMeans(int ntrial, int iter, int k, int d, std::vector& data, Model *model) { + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + +/* const int z = iter + 111; + + std::vector ndata(model->data.size()); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % z + model->data[i]; + } + rabit::Allreduce(&ndata[0], ndata.size()); + if (ntrial == iter && rank == 3) { + //throw MockException(); + } + for (size_t i = 0; i < ndata.size(); ++i) { + float rmax = (i * 1) % z + model->data[i]; + for (int r = 0; r < nproc; ++r) { + rmax = std::max(rmax, (float)((i * (r+1)) % z) + model->data[i]); + } + utils::Check(rmax == ndata[i], "[%d] TestMax check failure\n", rank); + } + model->data = ndata; + +*/ +} + +inline void ReadData(char* data_dir, int d, std::vector* data) { + int rank = rabit::GetRank(); + std::stringstream ss; + ss << data_dir << rank; + const char* file = ss.str().c_str(); + std::ifstream ifs(file); + utils::Check(ifs.good(), "[%d] File %s does not exist\n", rank, file); + float v = 0.0f; + while(!ifs.eof()) { + ifs >> v; + data->push_back(v); + } + utils::Check(data->size() % d == 0, "[%d] Invalid data size. %d instead of %d\n", rank, data->size(), d); +} + +inline void InitCentroids(int k, int d, std::vector& data, Model* model) { + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + std::vector candidate_centroids(model->data.size() - k); + int elements = data.size() / d; + for (size_t i = 0; i < k; ++i) { + int index = rand() % elements; + int start = index * d; + int end = start + d; + int cstart = i * d; + //utils::LogPrintf("[%d] index=%d,start=%d\n", rank, index, start); + for (size_t j = start, l = cstart; j < end; ++j, ++l) { + candidate_centroids[l] = data[j]; + } + } + for (size_t i = 0; i < k; ++i) { + int proc = rand() % nproc; + //utils::LogPrintf("[%d] proc=%d\n", rank, proc); + std::string tmp_str; + int start = i * d; + if (proc == rank) { + std::ostringstream tmp; + for (size_t j = start, l = 0; l < d ; ++j, ++l) { + tmp << candidate_centroids[j]; + if (l != d-1) tmp << " "; + } + tmp_str = tmp.str(); + //utils::LogPrintf("[%d] centroid %s\n", rank, tmp_str.c_str()); + rabit::Bcast(&tmp_str, proc); + } else { + rabit::Bcast(&tmp_str, proc); + } + std::stringstream tmp; + tmp.str(tmp_str); + float val = 0.0f; + int j = start; + while(tmp >> val) { + model->data[j++] = val; + //utils::LogPrintf("[%d] model[%d]=%.5f\n", rank, j-1, model->data[j-1]); + } + //count + model->data[j] = 0; + } +} + +int main(int argc, char *argv[]) { + if (argc < 4) { + printf("Usage: \n"); + return 0; + } + int k = atoi(argv[1]); + int d = atoi(argv[2]); + int max_itr = atoi(argv[3]); + + rabit::Init(argc, argv); + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + std::string name = rabit::GetProcessorName(); + + srand(0); + int ntrial = 0; + Model model; + + std::vector data; + int iter = rabit::LoadCheckPoint(&model); + if (iter == 0) { + ReadData(argv[4], d, &data); + model.InitModel(k, d); + InitCentroids(k, d, data, &model); + } else { + utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); + } + for (int r = iter; r < max_itr; ++r) { + KMeans(ntrial, r, k, d, data, &model); + } + rabit::Finalize(); + return 0; +} diff --git a/toolkit/kmeans.sh b/toolkit/kmeans.sh new file mode 100755 index 000000000..3723a13d4 --- /dev/null +++ b/toolkit/kmeans.sh @@ -0,0 +1,8 @@ +#!/bin/bash +if [ "$#" -lt 4 ]; +then + echo "Usage " + exit -1 +fi + +../submit_job_tcp.py $1 kmeans "${@:2}" From 20b51cc9ce088dbf48a39e1fcc8f9708f4aa6824 Mon Sep 17 00:00:00 2001 From: nachocano Date: Wed, 3 Dec 2014 01:44:34 -0800 Subject: [PATCH 048/531] cleaner --- toolkit/kmeans.cpp | 49 +++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index ecd8b5a7a..3b3bacd93 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -54,7 +54,7 @@ inline void KMeans(int ntrial, int iter, int k, int d, std::vector& data, */ } -inline void ReadData(char* data_dir, int d, std::vector* data) { +inline void ReadData(char* data_dir, int d, std::vector >* data) { int rank = rabit::GetRank(); std::stringstream ss; ss << data_dir << rank; @@ -63,37 +63,37 @@ inline void ReadData(char* data_dir, int d, std::vector* data) { utils::Check(ifs.good(), "[%d] File %s does not exist\n", rank, file); float v = 0.0f; while(!ifs.eof()) { - ifs >> v; - data->push_back(v); + int i=0; + std::vector vec; + while (i < d) { + ifs >> v; + vec.push_back(v); + i++; + } + utils::Check(vec.size() % d == 0, "[%d] Invalid data size. %d instead of %d\n", rank, vec.size(), d); + data->push_back(vec); } - utils::Check(data->size() % d == 0, "[%d] Invalid data size. %d instead of %d\n", rank, data->size(), d); } -inline void InitCentroids(int k, int d, std::vector& data, Model* model) { +inline void InitCentroids(int k, int d, std::vector >& data, Model* model) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); - std::vector candidate_centroids(model->data.size() - k); - int elements = data.size() / d; + std::vector > candidate_centroids; + candidate_centroids.resize(k, std::vector(d)); + int elements = data.size(); for (size_t i = 0; i < k; ++i) { int index = rand() % elements; - int start = index * d; - int end = start + d; - int cstart = i * d; - //utils::LogPrintf("[%d] index=%d,start=%d\n", rank, index, start); - for (size_t j = start, l = cstart; j < end; ++j, ++l) { - candidate_centroids[l] = data[j]; - } + candidate_centroids[i] = data[index]; } for (size_t i = 0; i < k; ++i) { int proc = rand() % nproc; //utils::LogPrintf("[%d] proc=%d\n", rank, proc); std::string tmp_str; - int start = i * d; if (proc == rank) { std::ostringstream tmp; - for (size_t j = start, l = 0; l < d ; ++j, ++l) { - tmp << candidate_centroids[j]; - if (l != d-1) tmp << " "; + for (size_t j = 0; j < d ; ++j) { + tmp << candidate_centroids[i][j]; + if (j != d-1) tmp << " "; } tmp_str = tmp.str(); //utils::LogPrintf("[%d] centroid %s\n", rank, tmp_str.c_str()); @@ -101,16 +101,17 @@ inline void InitCentroids(int k, int d, std::vector& data, Model* model) } else { rabit::Bcast(&tmp_str, proc); } - std::stringstream tmp; - tmp.str(tmp_str); + std::stringstream ss; + ss.str(tmp_str); float val = 0.0f; - int j = start; - while(tmp >> val) { + int j = i * d; + while(ss >> val) { model->data[j++] = val; //utils::LogPrintf("[%d] model[%d]=%.5f\n", rank, j-1, model->data[j-1]); } //count model->data[j] = 0; + //utils::LogPrintf("[%d] model[375]=%.5f\n", rank, model->data[375]); } } @@ -132,7 +133,7 @@ int main(int argc, char *argv[]) { int ntrial = 0; Model model; - std::vector data; + std::vector > data; int iter = rabit::LoadCheckPoint(&model); if (iter == 0) { ReadData(argv[4], d, &data); @@ -142,7 +143,7 @@ int main(int argc, char *argv[]) { utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } for (int r = iter; r < max_itr; ++r) { - KMeans(ntrial, r, k, d, data, &model); + //KMeans(ntrial, r, k, d, data, &model); } rabit::Finalize(); return 0; From 34f2f887b1556da0ac80d7429cd2df4c4e3ac72e Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 09:59:13 -0800 Subject: [PATCH 049/531] add more broadcast and basic broadcast --- src/allreduce_base.cc | 216 +++++++++++++++++++++++++++--------------- src/allreduce_base.h | 12 +++ src/mock.h | 2 +- src/rabit-inl.h | 32 +++++-- src/rabit.h | 24 ++++- src/socket.h | 25 +++++ src/tcp_master.py | 106 --------------------- 7 files changed, 225 insertions(+), 192 deletions(-) delete mode 100644 src/tcp_master.py diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 0cfb9fd34..42c7afaab 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -19,90 +19,20 @@ AllreduceBase::AllreduceBase(void) { host_uri = ""; slave_port = 9010; nport_trial = 1000; - rank = 0; + rank = -1; world_size = 1; version_number = 0; + job_id = "NULL"; this->SetParam("reduce_buffer", "256MB"); } // initialization function void AllreduceBase::Init(void) { utils::Socket::Startup(); - // single node mode - if (master_uri == "NULL") return; utils::Assert(links.size() == 0, "can only call Init once"); - int magic = kMagic; - int nchild = 0, nparent = 0; this->host_uri = utils::SockAddr::GetHostName(); // get information from master - utils::TCPSocket master; - master.Create(); - if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) { - utils::Socket::Error("Connect"); - } - utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 1"); - utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 2"); - utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); - utils::Assert(master.RecvAll(&rank, sizeof(rank)) == sizeof(rank), "sync::Init failure 3"); - utils::Assert(master.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), "sync::Init failure 4"); - utils::Assert(master.RecvAll(&nparent, sizeof(nparent)) == sizeof(nparent), "sync::Init failure 5"); - utils::Assert(master.RecvAll(&nchild, sizeof(nchild)) == sizeof(nchild), "sync::Init failure 6"); - utils::Assert(nchild >= 0, "in correct number of childs"); - utils::Assert(nparent == 1 || nparent == 0, "in correct number of parent"); - - // create listen - utils::TCPSocket sock_listen; - sock_listen.Create(); - int port = sock_listen.TryBindHost(slave_port, slave_port + nport_trial); - utils::Check(port != -1, "sync::Init fail to bind the ports specified"); - sock_listen.Listen(); - - if (nparent != 0) { - parent_index = 0; - links.push_back(LinkRecord()); - int len, hport; - std::string hname; - utils::Assert(master.RecvAll(&len, sizeof(len)) == sizeof(len), "sync::Init failure 9"); - hname.resize(len); - utils::Assert(len != 0, "string must not be empty"); - utils::Assert(master.RecvAll(&hname[0], len) == static_cast(len), "sync::Init failure 10"); - utils::Assert(master.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "sync::Init failure 11"); - links[0].sock.Create(); - links[0].sock.Connect(utils::SockAddr(hname.c_str(), hport)); - utils::Assert(links[0].sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 12"); - utils::Assert(links[0].sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 13"); - utils::Check(magic == kMagic, "sync::Init failure, parent magic number mismatch"); - parent_index = 0; - } else { - parent_index = -1; - } - // send back socket listening port to master - utils::Assert(master.SendAll(&port, sizeof(port)) == sizeof(port), "sync::Init failure 14"); - // close connection to master - master.Close(); - // accept links from childs - for (int i = 0; i < nchild; ++i) { - LinkRecord r; - while (true) { - r.sock = sock_listen.Accept(); - if (r.sock.RecvAll(&magic, sizeof(magic)) == sizeof(magic) && magic == kMagic) { - utils::Assert(r.sock.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 15"); - break; - } else { - // not a valid child - r.sock.Close(); - } - } - links.push_back(r); - } - // close listening sockets - sock_listen.Close(); - // setup selecter - for (size_t i = 0; i < links.size(); ++i) { - // set the socket to non-blocking mode - links[i].sock.SetNonBlock(true); - } - // done + this->ReConnectLinks(); } void AllreduceBase::Shutdown(void) { @@ -110,6 +40,22 @@ void AllreduceBase::Shutdown(void) { links[i].sock.Close(); } links.clear(); + + if (master_uri == "NULL") return; + int magic = kMagic; + // notify master rank i have shutdown + utils::TCPSocket master; + master.Create(); + if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) { + utils::Socket::Error("Connect Master"); + } + utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 1"); + utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); + utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); + + utils::Assert(master.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); + master.SendStr(job_id); + master.SendStr(std::string("shutdown")); utils::TCPSocket::Finalize(); } /*! @@ -120,6 +66,7 @@ void AllreduceBase::Shutdown(void) { void AllreduceBase::SetParam(const char *name, const char *val) { if (!strcmp(name, "master_uri")) master_uri = val; if (!strcmp(name, "master_port")) master_port = atoi(val); + if (!strcmp(name, "job_id")) job_id = val; if (!strcmp(name, "reduce_buffer")) { char unit; unsigned long amount; @@ -136,7 +83,129 @@ void AllreduceBase::SetParam(const char *name, const char *val) { } } } +/*! + * \brief connect to the master to fix the the missing links + * this function is also used when the engine start up + */ +void AllreduceBase::ReConnectLinks(void) { + // single node mode + if (master_uri == "NULL") { + rank = 0; return; + } + int magic = kMagic; + // get information from master + utils::TCPSocket master; + master.Create(); + if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) { + utils::Socket::Error("Connect"); + } + utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 1"); + utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); + utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); + utils::Assert(master.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); + master.SendStr(job_id); + master.SendStr(std::string("start")); + {// get new ranks + int newrank; + utils::Assert(master.RecvAll(&newrank, sizeof(newrank)) == sizeof(newrank), + "ReConnectLink failure 4"); + utils::Assert(master.RecvAll(&parent_rank, sizeof(parent_rank)) == sizeof(parent_rank), + "ReConnectLink failure 4"); + utils::Assert(rank == -1 || newrank == rank, "must keep rank to same if the node already have one"); + rank = newrank; + } + + // create listening socket + utils::TCPSocket sock_listen; + sock_listen.Create(); + int port = sock_listen.TryBindHost(slave_port, slave_port + nport_trial); + utils::Check(port != -1, "ReConnectLink fail to bind the ports specified"); + sock_listen.Listen(); + + // get number of to connect and number of to accept nodes from master + int num_conn, num_accept, num_error = 1; + + do { + // send over good links + std::vector good_link; + for (size_t i = 0; i < links.size(); ++i) { + if (!links[i].sock.BadSocket()) { + good_link.push_back(static_cast(links[i].rank)); + } else { + if (!links[i].sock.IsClosed()) links[i].sock.Close(); + } + } + int ngood = static_cast(good_link.size()); + utils::Assert(master.SendAll(&ngood, sizeof(ngood)) == sizeof(ngood), + "ReConnectLink failure 5"); + for (size_t i = 0; i < good_link.size(); ++i) { + utils::Assert(master.SendAll(&good_link[i], sizeof(good_link[i])) == sizeof(good_link[i]), + "ReConnectLink failure 6"); + } + utils::Assert(master.RecvAll(&num_conn, sizeof(num_conn)) == sizeof(num_conn), + "ReConnectLink failure 7"); + utils::Assert(master.RecvAll(&num_accept, sizeof(num_accept)) == sizeof(num_accept), + "ReConnectLink failure 8"); + num_error = 0; + for (int i = 0; i < num_conn; ++i) { + LinkRecord r; + int hport, hrank; + std::string hname; + master.RecvStr(&hname); + utils::Assert(master.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "ReConnectLink failure 9"); + utils::Assert(master.RecvAll(&hrank, sizeof(hrank)) == sizeof(hrank), "ReConnectLink failure 10"); + r.sock.Create(); + if (!r.sock.Connect(utils::SockAddr(hname.c_str(), hport))) { + num_error += 1; r.sock.Close(); continue; + } + utils::Assert(r.sock.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 12"); + utils::Assert(r.sock.RecvAll(&r.rank, sizeof(r.rank)) == sizeof(r.rank), "ReConnectLink failure 13"); + utils::Check(hrank == r.rank, "ReConnectLink failure, link rank inconsistent"); + bool match = false; + for (size_t i = 0; i < links.size(); ++i) { + if (links[i].rank == hrank) { + utils::Assert(links[i].sock.IsClosed(), "Override a link that is active"); + links[i].sock = r.sock; match = true; break; + } + } + if (!match) links.push_back(r); + } + utils::Assert(master.SendAll(&num_error, sizeof(num_error)) == sizeof(num_error), "ReConnectLink failure 14"); + } while (num_error != 0); + // send back socket listening port to master + utils::Assert(master.SendAll(&port, sizeof(port)) == sizeof(port), "ReConnectLink failure 14"); + // close connection to master + master.Close(); + // listen to incoming links + for (int i = 0; i < num_accept; ++i) { + LinkRecord r; + r.sock = sock_listen.Accept(); + utils::Assert(r.sock.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 15"); + utils::Assert(r.sock.RecvAll(&r.rank, sizeof(r.rank)) == sizeof(r.rank), "ReConnectLink failure 15"); + bool match = false; + for (size_t i = 0; i < links.size(); ++i) { + if (links[i].rank == r.rank) { + utils::Assert(links[i].sock.IsClosed(), "Override a link that is active"); + links[i].sock = r.sock; match = true; break; + } + } + if (!match) links.push_back(r); + } + // close listening sockets + sock_listen.Close(); + this->parent_index = -1; + // setup selecter + for (size_t i = 0; i < links.size(); ++i) { + utils::Assert(!links[i].sock.BadSocket(), "ReConnectLink: bad socket"); + // set the socket to non-blocking mode + links[i].sock.SetNonBlock(true); + if (links[i].rank == parent_rank) parent_index = static_cast(i); + } + if (parent_rank != -1) { + utils::Assert(parent_index != -1, "cannot find parent in the link"); + } +} /*! * \brief perform in-place allreduce, on sendrecvbuf, this function can fail, and will return the cause of failure * @@ -209,7 +278,6 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, finished = false; } } - } // finish runing allreduce if (finished) break; diff --git a/src/allreduce_base.h b/src/allreduce_base.h index 578b941f1..d5172f9f7 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -138,6 +138,8 @@ class AllreduceBase : public IEngine { public: // socket to get data from/to link utils::TCPSocket sock; + // rank of the node in this link + int rank; // size of data readed from link size_t size_read; // size of data sent to the link @@ -222,6 +224,11 @@ class AllreduceBase : public IEngine { // aligned with 64 bits, will be able to perform 64 bits operations freely std::vector buffer_; }; + /*! + * \brief connect to the master to fix the the missing links + * this function is also used when the engine start up + */ + void ReConnectLinks(void); /*! * \brief perform in-place allreduce, on sendrecvbuf, this function can fail, and will return the cause of failure * @@ -255,9 +262,14 @@ class AllreduceBase : public IEngine { //---- local data related to link ---- // index of parent link, can be -1, meaning this is root of the tree int parent_index; + // rank of parent node, can be -1 + int parent_rank; // sockets of all links std::vector links; //----- meta information----- + // unique identifier of the possible job this process is doing + // used to assign ranks, optional, default to NULL + std::string job_id; // uri of current host, to be set by Init std::string host_uri; // uri of master diff --git a/src/mock.h b/src/mock.h index 5c85b841f..31c93d113 100644 --- a/src/mock.h +++ b/src/mock.h @@ -42,7 +42,7 @@ public: inline void Broadcast(std::string *sendrecv_data, int root) { utils::Assert(verify(broadcast), "[%d] error when broadcasting", rank); - rabit::Bcast(sendrecv_data, root); + rabit::Broadcast(sendrecv_data, root); } diff --git a/src/rabit-inl.h b/src/rabit-inl.h index 4ea741efe..c38766bbb 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -91,16 +91,32 @@ inline int GetWorldSize(void) { inline std::string GetProcessorName(void) { return engine::GetEngine()->GetHost(); } -// broadcast an std::string to all others from root -inline void Bcast(std::string *sendrecv_data, int root) { - engine::IEngine *e = engine::GetEngine(); - unsigned len = static_cast(sendrecv_data->length()); - e->Broadcast(&len, sizeof(len), root); - sendrecv_data->resize(len); - if (len != 0) { - e->Broadcast(&(*sendrecv_data)[0], len, root); +// broadcast data to all other nodes from root +inline void Broadcast(void *sendrecv_data, size_t size, int root) { + engine::GetEngine()->Broadcast(sendrecv_data, size, root); +} +template +inline void Broadcast(std::vector *sendrecv_data, int root) { + size_t size = sendrecv_data->size(); + Broadcast(&size, sizeof(size), root); + if (sendrecv_data->size() != size) { + sendrecv_data->resize(size); + } + if (size != 0) { + Broadcast(&sendrecv_data[0], size * sizeof(DType), root); } } +inline void Broadcast(std::string *sendrecv_data, int root) { + size_t size = sendrecv_data->length(); + Broadcast(&size, sizeof(size), root); + if (sendrecv_data->length() != size) { + sendrecv_data->resize(size); + } + if (size != 0) { + Broadcast(&sendrecv_data[0], size * sizeof(char), root); + } +} + // perform inplace Allreduce template inline void Allreduce(DType *sendrecvbuf, size_t count) { diff --git a/src/rabit.h b/src/rabit.h index 859b5488a..99dd0b4d9 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -8,6 +8,8 @@ * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ +#include +#include #include "./engine.h" /*! \brief namespace of rabit */ @@ -43,11 +45,27 @@ inline std::string GetProcessorName(void); /*! * \brief broadcast an std::string to all others from root * \param sendrecv_data the pointer to send or recive buffer, - * receive buffer does not need to be pre-allocated - * and string will be resized to correct length + * \param size the size of the data * \param root the root of process */ -inline void Bcast(std::string *sendrecv_data, int root); +inline void Broadcast(void *sendrecv_data, size_t size, int root); +/*! + * \brief broadcast an std::vector to all others from root + * \param sendrecv_data the pointer to send or recive vector, + * for receiver, the vector does not need to be pre-allocated + * \param root the root of process + * \tparam DType the data type stored in vector, have to be simple data type + * that can be directly send by sending the sizeof(DType) data + */ +template +inline void Broadcast(std::vector *sendrecv_data, int root); +/*! + * \brief broadcast an std::string to all others from root + * \param sendrecv_data the pointer to send or recive vector, + * for receiver, the vector does not need to be pre-allocated + * \param root the root of process + */ +inline void Broadcast(std::string *sendrecv_data, int root); /*! * \brief perform in-place allreduce, on sendrecvbuf * this function is NOT thread-safe diff --git a/src/socket.h b/src/socket.h index 296b8aeea..3386b7d1d 100644 --- a/src/socket.h +++ b/src/socket.h @@ -331,6 +331,31 @@ class TCPSocket : public Socket{ } return ndone; } + /*! + * \brief send a string over network + * \param str the string to be sent + */ + inline void SendStr(const std::string &str) { + unsigned len = static_cast(str.length()); + utils::Assert(this->SendAll(&len, sizeof(len)) == sizeof(len), + "error during send SendStr"); + utils::Assert(this->SendAll(str.c_str(), str.length()) == str.length(), + "error during send SendStr"); + } + /*! + * \brief recv a string from network + * \param out_str the string to receive + */ + inline void RecvStr(std::string *out_str) { + unsigned len; + utils::Assert(this->RecvAll(&len, sizeof(len)) == sizeof(len), + "error during send RecvStr"); + out_str->resize(len); + if (len != 0) { + utils::Assert(this->RecvAll(&(*out_str)[0], len) == len, + "error during send SendStr"); + } + } }; /*! \brief helper data structure to perform select */ diff --git a/src/tcp_master.py b/src/tcp_master.py deleted file mode 100644 index 015b48784..000000000 --- a/src/tcp_master.py +++ /dev/null @@ -1,106 +0,0 @@ -""" -Master script for xgboost, tcp_master -This script can be used to start jobs of multi-node xgboost using sync_tcp - -Tianqi Chen -""" - -import sys -import os -import socket -import struct -import subprocess -from threading import Thread - -class ExSocket: - def __init__(self, sock): - self.sock = sock - def recvall(self, nbytes): - res = [] - sock = self.sock - nread = 0 - while nread < nbytes: - chunk = self.sock.recv(min(nbytes - nread, 1024), socket.MSG_WAITALL) - nread += len(chunk) - res.append(chunk) - return ''.join(res) - def recvint(self): - return struct.unpack('@i', self.recvall(4))[0] - def sendint(self, n): - self.sock.sendall(struct.pack('@i', n)) - def sendstr(self, s): - self.sendint(len(s)) - self.sock.sendall(s) - -# magic number used to verify existence of data -kMagic = 0xff99 - -class Master: - def __init__(self, port = 9000, port_end = 9999): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - for port in range(port, port_end): - try: - sock.bind(('', port)) - self.port = port - break - except socket.error: - continue - sock.listen(16) - self.sock = sock - print 'start listen on %s:%d' % (socket.gethostname(), self.port) - def __del__(self): - self.sock.close() - def slave_args(self): - return ['master_uri=%s' % socket.gethostname(), - 'master_port=%s' % self.port] - def accept_slaves(self, nslave): - slave_addrs = [] - for rank in range(nslave): - while True: - fd, s_addr = self.sock.accept() - slave = ExSocket(fd) - nparent = int(rank != 0) - nchild = 0 - if (rank + 1) * 2 - 1 < nslave: - nchild += 1 - if (rank + 1) * 2 < nslave: - nchild += 1 - try: - magic = slave.recvint() - if magic != kMagic: - print 'invalid magic number=%d from %s' % (magic, s_addr[0]) - slave.sock.close() - continue - except socket.error: - print 'sock error in %s' % (s_addr[0]) - slave.sock.close() - continue - slave.sendint(kMagic) - slave.sendint(rank) - slave.sendint(nslave) - slave.sendint(nparent) - slave.sendint(nchild) - if nparent != 0: - parent_index = (rank + 1) / 2 - 1 - ptuple = slave_addrs[parent_index] - slave.sendstr(ptuple[0]) - slave.sendint(ptuple[1]) - s_port = slave.recvint() - assert rank == len(slave_addrs) - slave_addrs.append((s_addr[0], s_port)) - slave.sock.close() - print 'finish starting rank=%d at %s' % (rank, s_addr[0]) - break - print 'all slaves setup complete' - -def mpi_submit(nslave, args): - cmd = ' '.join(['mpirun -n %d' % nslave] + args) - print cmd - return subprocess.check_call(cmd, shell = True) - -def submit(nslave, args, fun_submit = mpi_submit): - master = Master() - submit_thread = Thread(target = fun_submit, args = (nslave, args + master.slave_args())) - submit_thread.start() - master.accept_slaves(nslave) - submit_thread.join() From f3e5b6e13c658d2281e3e153c5e65a306ccc7387 Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 10:00:47 -0800 Subject: [PATCH 050/531] ok --- src/rabit.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/rabit.h b/src/rabit.h index 99dd0b4d9..68e39f3fa 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -43,7 +43,8 @@ inline int GetWorldSize(void); /*! \brief get name of processor */ inline std::string GetProcessorName(void); /*! - * \brief broadcast an std::string to all others from root + * \brief broadcast an memory region to all others from root + * Example: int a = 1; Broadcast(&a, sizeof(a), root); * \param sendrecv_data the pointer to send or recive buffer, * \param size the size of the data * \param root the root of process From ceeb6f06906ebf90a71a8f2c6de84f2882413651 Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 11:17:39 -0800 Subject: [PATCH 051/531] bug version, check in and rollback --- src/allreduce_base.cc | 1 + src/rabit_master.py | 175 +++++++++++++++++++++++++++++ submit_job_tcp.py => submit_job.py | 4 +- 3 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 src/rabit_master.py rename submit_job_tcp.py => submit_job.py (95%) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 42c7afaab..6ef941425 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -202,6 +202,7 @@ void AllreduceBase::ReConnectLinks(void) { links[i].sock.SetNonBlock(true); if (links[i].rank == parent_rank) parent_index = static_cast(i); } + utils::LogPrintf("[%d] parent_rank=%d, parent_index=%d, nlink=%d\n", rank, parent_rank, parent_index, (int)links.size()); if (parent_rank != -1) { utils::Assert(parent_index != -1, "cannot find parent in the link"); } diff --git a/src/rabit_master.py b/src/rabit_master.py new file mode 100644 index 000000000..85b981972 --- /dev/null +++ b/src/rabit_master.py @@ -0,0 +1,175 @@ +""" +Master script for rabit +Implements the master control protocol to start rabit jobs and assign necessary information + +Tianqi Chen +""" + +import sys +import os +import socket +import struct +import subprocess +from threading import Thread + +""" +Extension of socket to handle recv and send of special data +""" +class ExSocket: + def __init__(self, sock): + self.sock = sock + def recvall(self, nbytes): + res = [] + sock = self.sock + nread = 0 + while nread < nbytes: + chunk = self.sock.recv(min(nbytes - nread, 1024), socket.MSG_WAITALL) + nread += len(chunk) + res.append(chunk) + return ''.join(res) + def recvint(self): + return struct.unpack('@i', self.recvall(4))[0] + def sendint(self, n): + self.sock.sendall(struct.pack('@i', n)) + def sendstr(self, s): + self.sendint(len(s)) + self.sock.sendall(s) + def recvstr(self): + slen = self.recvint() + return self.recvall(slen) + +# magic number used to verify existence of data +kMagic = 0xff99 + +class SlaveEntry: + def __init__(self, sock, s_addr): + slave = ExSocket(sock) + self.sock = slave + self.host = s_addr[0] + magic = slave.recvint() + assert magic == kMagic, 'invalid magic number=%d from %s' % (magic, s_addr[0]) + slave.sendint(kMagic) + self.rank = slave.recvint() + self.jobid = slave.recvstr() + self.cmd = slave.recvstr() + + def decide_rank(self, job_map): + if self.rank >= 0: + return self.rank + if self.jobid != 'NULL' and self.jobid in job_map: + job_map[self.jobid] + return -1 + + def get_neighbor(self, rank, nslave): + rank = rank + 1 + ret = [] + if rank > 1: + ret.append(rank / 2 - 1) + if rank * 2 - 1 < nslave: + ret.append(rank * 2 - 1) + if rank * 2 < nslave: + ret.append(rank * 2) + return set(ret) + + def assign_rank(self, rank, wait_conn, nslave): + self.rank = rank + nnset = self.get_neighbor(rank, nslave) + self.sock.sendint(rank) + # send parent rank + self.sock.sendint((rank + 1) / 2 - 1) + while True: + ngood = self.sock.recvint() + goodset = set([]) + for i in xrange(ngood): + goodset.add(self.sock.recvint()) + assert goodset.issubset(nnset) + badset = nnset - goodset + conset = [] + for r in badset: + if r in wait_conn: + conset.append(r) + print 'rank=%d' % rank + print 'conset=%s' % str(conset) + self.sock.sendint(len(conset)) + self.sock.sendint(len(badset) - len(conset)) + for r in conset: + self.sock.sendstr(wait_conn[r].host) + self.sock.sendint(wait_conn[r].port) + self.sock.sendint(r) + nerr = self.sock.recvint() + if nerr != 0: + continue + self.port = self.sock.recvint() + rmset = [] + # all connection was successuly setup + for r in conset: + wait_conn[r].wait_accept -= 1 + if wait_conn[r].wait_accept == 0: + rmset.append(r) + for r in rmset: + wait_conn.pop(r, None) + self.wait_accept = len(badset) - len(conset) + print 'wait=%d' % self.wait_accept + return rmset + +class Master: + def __init__(self, port = 9000, port_end = 9999): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + for port in range(port, port_end): + try: + sock.bind(('', port)) + self.port = port + break + except socket.error: + continue + sock.listen(16) + self.sock = sock + print 'start listen on %s:%d' % (socket.gethostname(), self.port) + def __del__(self): + self.sock.close() + def slave_args(self): + return ['master_uri=%s' % socket.gethostname(), + 'master_port=%s' % self.port] + def accept_slaves(self, nslave): + # set of nodes that finishs the job + shutdown = {} + # set of nodes that is waiting for connections + wait_conn = {} + # set of nodes that is pending for getting up + todo_nodes = range(nslave) + # maps job id to rank + job_map = {} + # list of workers that is pending to be assigned rank + pending = [] + + while len(shutdown) != nslave: + fd, s_addr = self.sock.accept() + s = SlaveEntry(fd, s_addr) + if s.cmd == 'shutdown': + assert s.rank >= 0 and s.rank not in shutdown + assert s.rank not in wait_conn + shutdown[s.rank] = s + continue + assert s.cmd == 'start' + rank = s.decide_rank(job_map) + if rank == -1: + assert len(todo_nodes) != 0 + rank = todo_nodes.pop(0) + if s.jobid != 'NULL': + job_map[s.jobid] = rank + s.assign_rank(rank, wait_conn, nslave) + if s.wait_accept > 0: + wait_conn[rank] = s + print 'all slaves setup complete' + +def mpi_submit(nslave, args): + cmd = ' '.join(['mpirun -n %d' % nslave] + args) + print cmd + return subprocess.check_call(cmd, shell = True) + +def submit(nslave, args, fun_submit = mpi_submit): + master = Master() + submit_thread = Thread(target = fun_submit, args = (nslave, args + master.slave_args())) + submit_thread.start() + master.accept_slaves(nslave) + submit_thread.join() diff --git a/submit_job_tcp.py b/submit_job.py similarity index 95% rename from submit_job_tcp.py rename to submit_job.py index d79ef53bf..317ff6f43 100755 --- a/submit_job_tcp.py +++ b/submit_job.py @@ -1,7 +1,7 @@ #!/usr/bin/python """ This is an example script to create a customized job submit -script using xgboost sync_tcp mode +script using rabit engine """ import sys import os @@ -9,7 +9,7 @@ import subprocess # import the tcp_master.py # add path to sync sys.path.append(os.path.dirname(__file__)+'/src/') -import tcp_master as master +import rabit_master as master # # Note: this submit script is only used for example purpose From a186f8c3aaa10a088c9ac862b03da3adba159950 Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 11:19:43 -0800 Subject: [PATCH 052/531] ok --- src/socket.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/socket.h b/src/socket.h index 3386b7d1d..eba1b89f8 100644 --- a/src/socket.h +++ b/src/socket.h @@ -336,23 +336,25 @@ class TCPSocket : public Socket{ * \param str the string to be sent */ inline void SendStr(const std::string &str) { - unsigned len = static_cast(str.length()); + int len = static_cast(str.length()); utils::Assert(this->SendAll(&len, sizeof(len)) == sizeof(len), "error during send SendStr"); - utils::Assert(this->SendAll(str.c_str(), str.length()) == str.length(), - "error during send SendStr"); + if (len != 0) { + utils::Assert(this->SendAll(str.c_str(), str.length()) == str.length(), + "error during send SendStr"); + } } /*! * \brief recv a string from network * \param out_str the string to receive */ inline void RecvStr(std::string *out_str) { - unsigned len; + int len; utils::Assert(this->RecvAll(&len, sizeof(len)) == sizeof(len), "error during send RecvStr"); out_str->resize(len); if (len != 0) { - utils::Assert(this->RecvAll(&(*out_str)[0], len) == len, + utils::Assert(this->RecvAll(&(*out_str)[0], len) == out_str->length(), "error during send SendStr"); } } From 8a6768763dee8a14ea521005a2005d5fa15cf3c4 Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 11:51:39 -0800 Subject: [PATCH 053/531] bug fixed ver --- src/allreduce_base.cc | 13 ++++++------- src/allreduce_base.h | 2 +- src/rabit-inl.h | 4 ++-- src/rabit_master.py | 5 ++--- test/test_allreduce.cpp | 2 +- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 6ef941425..f90e5d621 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -55,7 +55,8 @@ void AllreduceBase::Shutdown(void) { utils::Assert(master.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); master.SendStr(job_id); - master.SendStr(std::string("shutdown")); + master.SendStr(std::string("shutdown")); + master.Close(); utils::TCPSocket::Finalize(); } /*! @@ -102,7 +103,6 @@ void AllreduceBase::ReConnectLinks(void) { utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 1"); utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); - utils::Assert(master.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); master.SendStr(job_id); master.SendStr(std::string("start")); @@ -112,10 +112,11 @@ void AllreduceBase::ReConnectLinks(void) { "ReConnectLink failure 4"); utils::Assert(master.RecvAll(&parent_rank, sizeof(parent_rank)) == sizeof(parent_rank), "ReConnectLink failure 4"); + utils::Assert(master.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), + "ReConnectLink failure 4"); utils::Assert(rank == -1 || newrank == rank, "must keep rank to same if the node already have one"); rank = newrank; - } - + } // create listening socket utils::TCPSocket sock_listen; sock_listen.Create(); @@ -125,7 +126,6 @@ void AllreduceBase::ReConnectLinks(void) { // get number of to connect and number of to accept nodes from master int num_conn, num_accept, num_error = 1; - do { // send over good links std::vector good_link; @@ -146,7 +146,7 @@ void AllreduceBase::ReConnectLinks(void) { utils::Assert(master.RecvAll(&num_conn, sizeof(num_conn)) == sizeof(num_conn), "ReConnectLink failure 7"); utils::Assert(master.RecvAll(&num_accept, sizeof(num_accept)) == sizeof(num_accept), - "ReConnectLink failure 8"); + "ReConnectLink failure 8"); num_error = 0; for (int i = 0; i < num_conn; ++i) { LinkRecord r; @@ -202,7 +202,6 @@ void AllreduceBase::ReConnectLinks(void) { links[i].sock.SetNonBlock(true); if (links[i].rank == parent_rank) parent_index = static_cast(i); } - utils::LogPrintf("[%d] parent_rank=%d, parent_index=%d, nlink=%d\n", rank, parent_rank, parent_index, (int)links.size()); if (parent_rank != -1) { utils::Assert(parent_index != -1, "cannot find parent in the link"); } diff --git a/src/allreduce_base.h b/src/allreduce_base.h index d5172f9f7..436916cda 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -80,7 +80,7 @@ class AllreduceBase : public IEngine { */ virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { utils::Assert(TryBroadcast(sendrecvbuf_, total_size, root) == kSuccess, - "Allreduce failed"); + "Broadcast failed"); } /*! * \brief load latest check point diff --git a/src/rabit-inl.h b/src/rabit-inl.h index c38766bbb..631686582 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -103,7 +103,7 @@ inline void Broadcast(std::vector *sendrecv_data, int root) { sendrecv_data->resize(size); } if (size != 0) { - Broadcast(&sendrecv_data[0], size * sizeof(DType), root); + Broadcast(&(*sendrecv_data)[0], size * sizeof(DType), root); } } inline void Broadcast(std::string *sendrecv_data, int root) { @@ -113,7 +113,7 @@ inline void Broadcast(std::string *sendrecv_data, int root) { sendrecv_data->resize(size); } if (size != 0) { - Broadcast(&sendrecv_data[0], size * sizeof(char), root); + Broadcast(&(*sendrecv_data)[0], size * sizeof(char), root); } } diff --git a/src/rabit_master.py b/src/rabit_master.py index 85b981972..1cfc00dc0 100644 --- a/src/rabit_master.py +++ b/src/rabit_master.py @@ -77,6 +77,8 @@ class SlaveEntry: self.sock.sendint(rank) # send parent rank self.sock.sendint((rank + 1) / 2 - 1) + # send world size + self.sock.sendint(nslave) while True: ngood = self.sock.recvint() goodset = set([]) @@ -88,8 +90,6 @@ class SlaveEntry: for r in badset: if r in wait_conn: conset.append(r) - print 'rank=%d' % rank - print 'conset=%s' % str(conset) self.sock.sendint(len(conset)) self.sock.sendint(len(badset) - len(conset)) for r in conset: @@ -109,7 +109,6 @@ class SlaveEntry: for r in rmset: wait_conn.pop(r, None) self.wait_accept = len(badset) - len(conset) - print 'wait=%d' % self.wait_accept return rmset class Master: diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 625d9592a..707b1a22a 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -80,7 +80,7 @@ int main(int argc, char *argv[]) { TestSum(mock, n); utils::LogPrintf("[%d] !!!TestSum pass\n", rank); int step = std::max(nproc / 3, 1); - for (int i = 0; i < nproc; i += step) { + for (int i = 0; i < nproc; i += step) { TestBcast(mock, n, i); } utils::LogPrintf("[%d] !!!TestBcast pass\n", rank); From 252328850971a70792c9cae4c4123efa9da91c39 Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 12:19:08 -0800 Subject: [PATCH 054/531] basic recovery works --- src/allreduce_base.cc | 4 ++-- src/allreduce_base.h | 3 ++- src/allreduce_robust.cc | 18 +++++++++--------- src/allreduce_robust.h | 9 ++------- src/rabit-inl.h | 2 +- src/rabit_master.py | 7 ++++++- 6 files changed, 22 insertions(+), 21 deletions(-) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index f90e5d621..4c30b62d2 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -88,7 +88,7 @@ void AllreduceBase::SetParam(const char *name, const char *val) { * \brief connect to the master to fix the the missing links * this function is also used when the engine start up */ -void AllreduceBase::ReConnectLinks(void) { +void AllreduceBase::ReConnectLinks(const char *cmd) { // single node mode if (master_uri == "NULL") { rank = 0; return; @@ -105,7 +105,7 @@ void AllreduceBase::ReConnectLinks(void) { utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); utils::Assert(master.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); master.SendStr(job_id); - master.SendStr(std::string("start")); + master.SendStr(std::string(cmd)); {// get new ranks int newrank; utils::Assert(master.RecvAll(&newrank, sizeof(newrank)) == sizeof(newrank), diff --git a/src/allreduce_base.h b/src/allreduce_base.h index 436916cda..cd9a5b0d0 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -227,8 +227,9 @@ class AllreduceBase : public IEngine { /*! * \brief connect to the master to fix the the missing links * this function is also used when the engine start up + * \param cmd possible command to sent to master */ - void ReConnectLinks(void); + void ReConnectLinks(const char *cmd = "start"); /*! * \brief perform in-place allreduce, on sendrecvbuf, this function can fail, and will return the cause of failure * diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index d2339a3be..6aba63e82 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -281,14 +281,6 @@ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { } return kSuccess; } -/*! - * \brief try to reconnect the broken links - * \return this function can kSuccess or kSockError - */ -AllreduceRobust::ReturnType AllreduceRobust::TryReConnectLinks(void) { - utils::Error("TryReConnectLinks: not implemented"); - return kSuccess; -} /*! * \brief if err_type indicates an error * recover links according to the error type reported @@ -298,12 +290,20 @@ AllreduceRobust::ReturnType AllreduceRobust::TryReConnectLinks(void) { */ bool AllreduceRobust::CheckAndRecover(ReturnType err_type) { if (err_type == kSuccess) return true; + // simple way, shutdown all links + for (size_t i = 0; i < links.size(); ++i) { + if (!links[i].sock.BadSocket()) links[i].sock.Close(); + } + ReConnectLinks("recover"); + return false; + // this was old way while(err_type != kSuccess) { switch(err_type) { case kGetExcept: err_type = TryResetLinks(); break; case kSockError: { TryResetLinks(); - err_type = TryReConnectLinks(); + ReConnectLinks(); + err_type = kSuccess; break; } default: utils::Assert(false, "RecoverLinks: cannot reach here"); diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index 26e45f16c..ad660da94 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -70,7 +70,7 @@ class AllreduceRobust : public AllreduceBase { * this function is only used for test purpose */ virtual void InitAfterException(void) { - //this->CheckAndRecover(kGetExcept); + this->CheckAndRecover(kGetExcept); } private: @@ -234,12 +234,7 @@ class AllreduceRobust : public AllreduceBase { * when kSockError is returned, it simply means there are bad sockets in the links, * and some link recovery proceduer is needed */ - ReturnType TryResetLinks(void); - /*! - * \brief try to reconnect the broken links - * \return this function can kSuccess or kSockError - */ - ReturnType TryReConnectLinks(void); + ReturnType TryResetLinks(void); /*! * \brief if err_type indicates an error * recover links according to the error type reported diff --git a/src/rabit-inl.h b/src/rabit-inl.h index 631686582..f3fd39b2a 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -1,7 +1,7 @@ /*! * \file rabit-inl.h * \brief implementation of inline template function for rabit interface - * + * * \author Tianqi Chen */ #ifndef RABIT_RABIT_INL_H diff --git a/src/rabit_master.py b/src/rabit_master.py index 1cfc00dc0..cfa1cce9a 100644 --- a/src/rabit_master.py +++ b/src/rabit_master.py @@ -10,6 +10,7 @@ import os import socket import struct import subprocess +import random from threading import Thread """ @@ -136,6 +137,7 @@ class Master: wait_conn = {} # set of nodes that is pending for getting up todo_nodes = range(nslave) + random.shuffle(todo_nodes) # maps job id to rank job_map = {} # list of workers that is pending to be assigned rank @@ -149,7 +151,10 @@ class Master: assert s.rank not in wait_conn shutdown[s.rank] = s continue - assert s.cmd == 'start' + assert s.cmd == 'start' or s.cmd == 'recover' + if s.cmd == 'recover': + assert s.rank >= 0 + print 'Recieve recover signal from %d' % s.rank rank = s.decide_rank(job_map) if rank == -1: assert len(todo_nodes) != 0 From 7a983a4079632d1200c0de45c6237f3f9a463a3f Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 13:21:30 -0800 Subject: [PATCH 055/531] add keepalive --- src/rabit_master.py | 2 +- test/keepalive.sh | 12 ++++++++++++ test/test_model_recover.cpp | 4 ++-- 3 files changed, 15 insertions(+), 3 deletions(-) create mode 100755 test/keepalive.sh diff --git a/src/rabit_master.py b/src/rabit_master.py index cfa1cce9a..dbe303e39 100644 --- a/src/rabit_master.py +++ b/src/rabit_master.py @@ -58,7 +58,7 @@ class SlaveEntry: if self.rank >= 0: return self.rank if self.jobid != 'NULL' and self.jobid in job_map: - job_map[self.jobid] + return job_map[self.jobid] return -1 def get_neighbor(self, rank, nslave): diff --git a/test/keepalive.sh b/test/keepalive.sh new file mode 100755 index 000000000..99bbb83f5 --- /dev/null +++ b/test/keepalive.sh @@ -0,0 +1,12 @@ +#!/bin/bash +if [ "$#" -lt 1 ]; +then + echo "Usage: program parameters" + echo "Repeatively run program until success" + exit -1 +fi +echo ./$@ job_id=$OMPI_COMM_WORLD_RANK +until ./$@ job_id=$OMPI_COMM_WORLD_RANK; do + echo "Server "$1" crashed with exit code $?. Respawning.." >&2 + sleep 1 +done diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index c482c266c..0d9f1bce7 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -41,7 +41,7 @@ inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { } mock.Allreduce(&ndata[0], ndata.size()); if (ntrial == iter && rank == 3) { - throw MockException(); + exit(-1); } for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; @@ -65,7 +65,7 @@ inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { mock.Allreduce(&ndata[0], ndata.size()); if (ntrial == iter && rank == 0) { - throw MockException(); + exit(-1); } for (size_t i = 0; i < ndata.size(); ++i) { From 1d0d5bb14164d81a51bac83b041cfdeb6caf560f Mon Sep 17 00:00:00 2001 From: nachocano Date: Wed, 3 Dec 2014 14:21:10 -0800 Subject: [PATCH 056/531] kmeans seems to be working.. not restarting anything though --- src/utils.h | 10 +++++ toolkit/kmeans.cpp | 100 ++++++++++++++++++++++++++++----------------- toolkit/kmeans.sh | 2 +- 3 files changed, 74 insertions(+), 38 deletions(-) diff --git a/src/utils.h b/src/utils.h index d09667d89..ec4458bb3 100644 --- a/src/utils.h +++ b/src/utils.h @@ -161,6 +161,16 @@ inline void Error(const char *fmt, ...) { } #endif +/*!\brief computes the dot product between two dense vectors */ +inline float DotProduct(const std::vector& v1, const std::vector& v2) { + utils::Assert(v1.size() == v2.size(), "Arrays have different sizes"); + float result = 0.0f; + for (int i = 0; i < v1.size(); ++i) { + result += v1[i] * v2[i]; + } + return result; +} + /*! \brief replace fopen, report error when the file open fails */ inline std::FILE *FopenCheck(const char *fname, const char *flag) { std::FILE *fp = fopen64(fname, flag); diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index 3b3bacd93..20a941e9e 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -8,6 +8,7 @@ #include #include #include +#include using namespace rabit; @@ -28,30 +29,65 @@ class Model : public rabit::utils::ISerializable { }; -inline void KMeans(int ntrial, int iter, int k, int d, std::vector& data, Model *model) { +inline void KMeans(int ntrial, int iter, int k, int d, std::vector >& data, Model *model) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); -/* const int z = iter + 111; + utils::LogPrintf("[%d] Running KMeans iter=%d\n", rank, iter); - std::vector ndata(model->data.size()); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % z + model->data[i]; - } - rabit::Allreduce(&ndata[0], ndata.size()); - if (ntrial == iter && rank == 3) { - //throw MockException(); - } - for (size_t i = 0; i < ndata.size(); ++i) { - float rmax = (i * 1) % z + model->data[i]; - for (int r = 0; r < nproc; ++r) { - rmax = std::max(rmax, (float)((i * (r+1)) % z) + model->data[i]); + // compute centroids + std::vector > centroids; + centroids.resize(k, std::vector(d)); + for (int i = 0; i < k; ++i) { + std::vector centroid(d); + int start = i * d + i; + int count = model->data[start + d]; + //utils::LogPrintf("[%d] count=%d\n", rank, count); + for (int j = start, l = 0; l < d; ++j, ++l) { + centroid[l] = model->data[j] / count; } - utils::Check(rmax == ndata[i], "[%d] TestMax check failure\n", rank); + centroids[i] = centroid; } + + // compute assignments + int size = data.size(); + std::vector assignments(size, -1); + for (int i = 0; i < size; ++i) { + float max_sim = FLT_MIN; + for (int j = 0; j < k; ++j) { + float sim = utils::DotProduct(data[i], centroids[j]); + if (sim > max_sim) { + assignments[i] = j; + max_sim = sim; + } + } + } + + // add values and increment counts + std::vector ndata(k * d + k, 0.0f); + for (int i=0; i < size; i++) { + int index = assignments[i]; + int start = index * d + index; + int j = start; + for (int l = 0; l < d; ++j, ++l) { + ndata[j] += data[i][l]; + } + ndata[j] += 1; + } + + // reduce + rabit::Allreduce(&ndata[0], ndata.size()); model->data = ndata; -*/ + /* + if (rank == 0) { + int counts = 0; + for (int i = 0; i < k; ++i) { + counts += model->data[i * d + i + d]; + } + utils::LogPrintf("[%d] counts=%d\n", rank, counts); + } + */ } inline void ReadData(char* data_dir, int d, std::vector >* data) { @@ -88,30 +124,19 @@ inline void InitCentroids(int k, int d, std::vector >& data, for (size_t i = 0; i < k; ++i) { int proc = rand() % nproc; //utils::LogPrintf("[%d] proc=%d\n", rank, proc); - std::string tmp_str; + std::vector tmp(d, 0.0f); if (proc == rank) { - std::ostringstream tmp; - for (size_t j = 0; j < d ; ++j) { - tmp << candidate_centroids[i][j]; - if (j != d-1) tmp << " "; - } - tmp_str = tmp.str(); - //utils::LogPrintf("[%d] centroid %s\n", rank, tmp_str.c_str()); - rabit::Bcast(&tmp_str, proc); + tmp = candidate_centroids[i]; + rabit::Broadcast(&tmp, proc); } else { - rabit::Bcast(&tmp_str, proc); + rabit::Broadcast(&tmp, proc); } - std::stringstream ss; - ss.str(tmp_str); - float val = 0.0f; - int j = i * d; - while(ss >> val) { - model->data[j++] = val; - //utils::LogPrintf("[%d] model[%d]=%.5f\n", rank, j-1, model->data[j-1]); + int start = i * d + i; + int j = start; + for (int l = 0; l < d; ++j, ++l) { + model->data[j] = tmp[l]; } - //count - model->data[j] = 0; - //utils::LogPrintf("[%d] model[375]=%.5f\n", rank, model->data[375]); + model->data[j] = 1; } } @@ -143,7 +168,8 @@ int main(int argc, char *argv[]) { utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } for (int r = iter; r < max_itr; ++r) { - //KMeans(ntrial, r, k, d, data, &model); + KMeans(ntrial, r, k, d, data, &model); + rabit::CheckPoint(model); } rabit::Finalize(); return 0; diff --git a/toolkit/kmeans.sh b/toolkit/kmeans.sh index 3723a13d4..53235a318 100755 --- a/toolkit/kmeans.sh +++ b/toolkit/kmeans.sh @@ -5,4 +5,4 @@ then exit -1 fi -../submit_job_tcp.py $1 kmeans "${@:2}" +../submit_job.py $1 kmeans "${@:2}" From 90b9f1a98a2056a235ec6d792b97320f2c07514c Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 15:04:30 -0800 Subject: [PATCH 057/531] add keepalive script --- test/keepalive.sh | 6 ++++-- test/test_model_recover.cpp | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/test/keepalive.sh b/test/keepalive.sh index 99bbb83f5..e72a2bba9 100755 --- a/test/keepalive.sh +++ b/test/keepalive.sh @@ -5,8 +5,10 @@ then echo "Repeatively run program until success" exit -1 fi +nrep=0 echo ./$@ job_id=$OMPI_COMM_WORLD_RANK -until ./$@ job_id=$OMPI_COMM_WORLD_RANK; do - echo "Server "$1" crashed with exit code $?. Respawning.." >&2 +until ./$@ job_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do sleep 1 + nrep=$((nrep+1)) + echo ./$@ job_id=$OMPI_COMM_WORLD_RANK repeat=$nrep done diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index 0d9f1bce7..86762c671 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -109,11 +109,16 @@ int main(int argc, char *argv[]) { Model model; srand(0); int ntrial = 0; + for (int i = 1; i < argc; ++i) { + int n; + if (sscanf(argv[i], "repeat=%d", &n) == 1) ntrial = n; + } while (true) { try { int iter = rabit::LoadCheckPoint(&model); if (iter == 0) { model.InitModel(n); + utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } else { utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } From 5c23b94069d29b7a083ac52c09a1074652d075fc Mon Sep 17 00:00:00 2001 From: nachocano Date: Wed, 3 Dec 2014 15:38:58 -0800 Subject: [PATCH 058/531] updating kmeans based on Tianqi feedback. More efficient now --- src/utils.h | 10 ------ toolkit/kmeans.cpp | 79 +++++++++++++++++++--------------------------- 2 files changed, 33 insertions(+), 56 deletions(-) diff --git a/src/utils.h b/src/utils.h index ec4458bb3..d09667d89 100644 --- a/src/utils.h +++ b/src/utils.h @@ -161,16 +161,6 @@ inline void Error(const char *fmt, ...) { } #endif -/*!\brief computes the dot product between two dense vectors */ -inline float DotProduct(const std::vector& v1, const std::vector& v2) { - utils::Assert(v1.size() == v2.size(), "Arrays have different sizes"); - float result = 0.0f; - for (int i = 0; i < v1.size(); ++i) { - result += v1[i] * v2[i]; - } - return result; -} - /*! \brief replace fopen, report error when the file open fails */ inline std::FILE *FopenCheck(const char *fname, const char *flag) { std::FILE *fp = fopen64(fname, flag); diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index 20a941e9e..dff12b77c 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -14,80 +14,69 @@ using namespace rabit; class Model : public rabit::utils::ISerializable { public: - std::vector data; + std::vector centroids; // load from stream virtual void Load(rabit::utils::IStream &fi) { - fi.Read(&data); + fi.Read(¢roids); } /*! \brief save the model to the stream */ virtual void Save(rabit::utils::IStream &fo) const { - fo.Write(data); + fo.Write(centroids); } virtual void InitModel(int k, int d) { - data.resize(k * d + k, 0.0f); + centroids.resize(k * d, 0.0f); } }; +/*!\brief computes a random number modulo the value */ +inline int Random(int value) { + return rand() % value; +} + inline void KMeans(int ntrial, int iter, int k, int d, std::vector >& data, Model *model) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); utils::LogPrintf("[%d] Running KMeans iter=%d\n", rank, iter); - // compute centroids - std::vector > centroids; - centroids.resize(k, std::vector(d)); - for (int i = 0; i < k; ++i) { - std::vector centroid(d); - int start = i * d + i; - int count = model->data[start + d]; - //utils::LogPrintf("[%d] count=%d\n", rank, count); - for (int j = start, l = 0; l < d; ++j, ++l) { - centroid[l] = model->data[j] / count; - } - centroids[i] = centroid; - } - - // compute assignments - int size = data.size(); - std::vector assignments(size, -1); - for (int i = 0; i < size; ++i) { + // compute ndata based on assignments + std::vector ndata(k * d + k, 0.0f); + for (int i = 0; i < data.size(); ++i) { float max_sim = FLT_MIN; + int cindex = -1; for (int j = 0; j < k; ++j) { - float sim = utils::DotProduct(data[i], centroids[j]); + float sim = 0.0f; + int cstart = j * d; + for (int y = 0, z = cstart; y < d; ++y, ++z) { + sim += model->centroids[z] * data[i][y]; + } if (sim > max_sim) { - assignments[i] = j; + cindex = j; max_sim = sim; } } - } - - // add values and increment counts - std::vector ndata(k * d + k, 0.0f); - for (int i=0; i < size; i++) { - int index = assignments[i]; - int start = index * d + index; + int start = cindex * d + cindex; int j = start; for (int l = 0; l < d; ++j, ++l) { ndata[j] += data[i][l]; } + // update count ndata[j] += 1; } - // reduce + // do Allreduce rabit::Allreduce(&ndata[0], ndata.size()); - model->data = ndata; - /* - if (rank == 0) { - int counts = 0; - for (int i = 0; i < k; ++i) { - counts += model->data[i * d + i + d]; + for (int i = 0; i < k; ++i) { + int nstart = i * d + i; + int cstart = i * d; + int cend= cstart + d; + int count = ndata[nstart + d]; + for (int j = nstart, l = cstart; l < cend; ++j, ++l) { + model->centroids[l] = ndata[j] / count; } - utils::LogPrintf("[%d] counts=%d\n", rank, counts); } - */ } inline void ReadData(char* data_dir, int d, std::vector >* data) { @@ -118,12 +107,11 @@ inline void InitCentroids(int k, int d, std::vector >& data, candidate_centroids.resize(k, std::vector(d)); int elements = data.size(); for (size_t i = 0; i < k; ++i) { - int index = rand() % elements; + int index = Random(elements); candidate_centroids[i] = data[index]; } for (size_t i = 0; i < k; ++i) { - int proc = rand() % nproc; - //utils::LogPrintf("[%d] proc=%d\n", rank, proc); + int proc = Random(nproc); std::vector tmp(d, 0.0f); if (proc == rank) { tmp = candidate_centroids[i]; @@ -131,12 +119,11 @@ inline void InitCentroids(int k, int d, std::vector >& data, } else { rabit::Broadcast(&tmp, proc); } - int start = i * d + i; + int start = i * d; int j = start; for (int l = 0; l < d; ++j, ++l) { - model->data[j] = tmp[l]; + model->centroids[j] = tmp[l]; } - model->data[j] = 1; } } From 69af79d45d7ad07fffb0eb9f698b5eeb195bb4a5 Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 18:15:28 -0800 Subject: [PATCH 059/531] sparse kmeans --- toolkit/kmeans.cpp | 219 ++++++++++++++++++----------------------- toolkit/toolkit_util.h | 89 +++++++++++++++++ 2 files changed, 185 insertions(+), 123 deletions(-) create mode 100644 toolkit/toolkit_util.h diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index dff12b77c..5811f7fc8 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -2,160 +2,133 @@ // facing an exception #include #include -#include -#include -#include -#include -#include -#include -#include +#include "./toolkit_util.h" using namespace rabit; +// kmeans model class Model : public rabit::utils::ISerializable { public: - std::vector centroids; + // matrix of centroids + Matrix centroids; // load from stream virtual void Load(rabit::utils::IStream &fi) { - fi.Read(¢roids); + fi.Read(¢roids.nrow, sizeof(centroids.nrow)); + fi.Read(¢roids.ncol, sizeof(centroids.ncol)); + fi.Read(¢roids.data); } /*! \brief save the model to the stream */ virtual void Save(rabit::utils::IStream &fo) const { - fo.Write(centroids); + fo.Write(¢roids.nrow, sizeof(centroids.nrow)); + fo.Write(¢roids.ncol, sizeof(centroids.ncol)); + fo.Write(centroids.data); } - virtual void InitModel(int k, int d) { - centroids.resize(k * d, 0.0f); + virtual void InitModel(unsigned num_cluster, unsigned feat_dim) { + centroids.Init(num_cluster, feat_dim); + } + // normalize L2 norm + inline void Normalize(void) { + for (size_t i = 0; i < centroids.nrow; ++i) { + float *row = centroids[i]; + double wsum = 0.0; + for (size_t j = 0; j < centroids.ncol; ++j) { + wsum += row[j] * row[j]; + } + wsum = sqrt(wsum); + if (wsum < 1e-6) return; + float winv = 1.0 / wsum; + for (size_t j = 0; j < centroids.ncol; ++j) { + row[j] *= winv; + } + } } - }; - -/*!\brief computes a random number modulo the value */ -inline int Random(int value) { - return rand() % value; -} - -inline void KMeans(int ntrial, int iter, int k, int d, std::vector >& data, Model *model) { - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - - utils::LogPrintf("[%d] Running KMeans iter=%d\n", rank, iter); - - // compute ndata based on assignments - std::vector ndata(k * d + k, 0.0f); - for (int i = 0; i < data.size(); ++i) { - float max_sim = FLT_MIN; - int cindex = -1; - for (int j = 0; j < k; ++j) { - float sim = 0.0f; - int cstart = j * d; - for (int y = 0, z = cstart; y < d; ++y, ++z) { - sim += model->centroids[z] * data[i][y]; - } - if (sim > max_sim) { - cindex = j; - max_sim = sim; - } +inline void InitCentroids(const SparseMat &data, Matrix *centroids) { + int num_cluster = centroids->nrow; + for (int i = 0; i < num_cluster; ++i) { + int index = Random(data.NumRow()); + SparseMat::Vector v = data[index]; + for (unsigned j = 0; j < v.length; ++j) { + (*centroids)[i][v[j].findex] = v[j].fvalue; } - int start = cindex * d + cindex; - int j = start; - for (int l = 0; l < d; ++j, ++l) { - ndata[j] += data[i][l]; - } - // update count - ndata[j] += 1; } - - // do Allreduce - rabit::Allreduce(&ndata[0], ndata.size()); - - for (int i = 0; i < k; ++i) { - int nstart = i * d + i; - int cstart = i * d; - int cend= cstart + d; - int count = ndata[nstart + d]; - for (int j = nstart, l = cstart; l < cend; ++j, ++l) { - model->centroids[l] = ndata[j] / count; - } + for (int i = 0; i < num_cluster; ++i) { + int proc = Random(rabit::GetWorldSize()); + rabit::Broadcast((*centroids)[i], centroids->ncol * sizeof(float), proc); } } -inline void ReadData(char* data_dir, int d, std::vector >* data) { - int rank = rabit::GetRank(); - std::stringstream ss; - ss << data_dir << rank; - const char* file = ss.str().c_str(); - std::ifstream ifs(file); - utils::Check(ifs.good(), "[%d] File %s does not exist\n", rank, file); - float v = 0.0f; - while(!ifs.eof()) { - int i=0; - std::vector vec; - while (i < d) { - ifs >> v; - vec.push_back(v); - i++; - } - utils::Check(vec.size() % d == 0, "[%d] Invalid data size. %d instead of %d\n", rank, vec.size(), d); - data->push_back(vec); +inline double Cos(const float *row, + const SparseMat::Vector &v) { + double rdot = 0.0, rnorm = 0.0; + for (unsigned i = 0; i < v.length; ++i) { + rdot += row[v[i].findex] * v[i].fvalue; + rnorm += v[i].fvalue * v[i].fvalue; } + return rdot / sqrt(rnorm); } - -inline void InitCentroids(int k, int d, std::vector >& data, Model* model) { - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - std::vector > candidate_centroids; - candidate_centroids.resize(k, std::vector(d)); - int elements = data.size(); - for (size_t i = 0; i < k; ++i) { - int index = Random(elements); - candidate_centroids[i] = data[index]; - } - for (size_t i = 0; i < k; ++i) { - int proc = Random(nproc); - std::vector tmp(d, 0.0f); - if (proc == rank) { - tmp = candidate_centroids[i]; - rabit::Broadcast(&tmp, proc); - } else { - rabit::Broadcast(&tmp, proc); - } - int start = i * d; - int j = start; - for (int l = 0; l < d; ++j, ++l) { - model->centroids[j] = tmp[l]; - } +inline size_t GetCluster(const Matrix ¢roids, + const SparseMat::Vector &v) { + size_t imin = 0; + double dmin = Cos(centroids[0], v); + for (size_t k = 1; k < centroids.nrow; ++k) { + double dist = Cos(centroids[k], v); + if (dist < dmin) { + dmin = dist; imin = k; + } } + return imin; } - + int main(int argc, char *argv[]) { if (argc < 4) { - printf("Usage: \n"); + printf("Usage: num_cluster max_iter\n"); return 0; } - int k = atoi(argv[1]); - int d = atoi(argv[2]); - int max_itr = atoi(argv[3]); - - rabit::Init(argc, argv); - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - std::string name = rabit::GetProcessorName(); - srand(0); - int ntrial = 0; - Model model; - - std::vector > data; + // load the data + SparseMat data; + data.Load(argv[1]); + // set the parameters + int num_cluster = atoi(argv[2]); + int max_iter = atoi(argv[3]); + // intialize rabit engine + rabit::Init(argc, argv); + // load model + Model model; int iter = rabit::LoadCheckPoint(&model); if (iter == 0) { - ReadData(argv[4], d, &data); - model.InitModel(k, d); - InitCentroids(k, d, data, &model); + rabit::Allreduce(&data.feat_dim, sizeof(data.feat_dim)); + model.InitModel(num_cluster, data.feat_dim); + InitCentroids(data, &model.centroids); + model.Normalize(); + utils::LogPrintf("[%d] start at %s\n", + rabit::GetRank(), rabit::GetProcessorName().c_str()); } else { - utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); + utils::LogPrintf("[%d] restart iter=%d\n", rabit::GetRank(), iter); } - for (int r = iter; r < max_itr; ++r) { - KMeans(ntrial, r, k, d, data, &model); + const unsigned num_feat = data.feat_dim; + // matrix to store the result + Matrix temp; + for (int r = iter; r < max_iter; ++r) { + temp.Init(num_cluster, num_feat + 1, 0.0f); + const size_t ndata = data.NumRow(); + for (size_t i = 0; i < ndata; ++i) { + SparseMat::Vector v = data[i]; + size_t k = GetCluster(model.centroids, v); + for (size_t j = 0; j < v.length; ++j) { + temp[k][v[j].findex] += v[j].fvalue; + } + temp[k][num_feat] += 1.0f; + } + rabit::Allreduce(&temp.data[0], temp.data.size()); + for (int k = 0; k < num_cluster; ++k) { + float cnt = temp[k][num_feat]; + for (unsigned i = 0; i < num_feat; ++i) { + model.centroids[k][i] = temp[k][i] / cnt; + } + } + model.Normalize(); rabit::CheckPoint(model); } rabit::Finalize(); diff --git a/toolkit/toolkit_util.h b/toolkit/toolkit_util.h new file mode 100644 index 000000000..71bf888d0 --- /dev/null +++ b/toolkit/toolkit_util.h @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include + +namespace rabit { +/*! \brief sparse matrix, CSR format */ +struct SparseMat { + // sparse matrix entry + struct Entry { + // feature index + unsigned findex; + // feature value + float fvalue; + }; + // sparse vector + struct Vector { + const Entry *data; + unsigned length; + inline const Entry &operator[](size_t i) const { + return data[i]; + } + }; + inline Vector operator[](size_t i) const { + Vector v; + v.data = &data[0] + row_ptr[i]; + v.length = static_cast(row_ptr[i + 1]-row_ptr[i]); + return v; + } + // load data from file + inline void Load(const char *fname) { + FILE *fi; + if (!strcmp(fname, "stdin")) { + fi = stdin; + } else { + fi = utils::FopenCheck(fname, "r"); + } + row_ptr.clear(); + row_ptr.push_back(0); + data.clear(); + feat_dim = 0; + unsigned num_feat; + while (fscanf(fi, "%u", &num_feat) == 1) { + Entry e; + for (unsigned i = 0; i < num_feat; ++i) { + utils::Check(fscanf(fi, "%u:%f", &e.findex, &e.fvalue) == 2, + "invalid format"); + data.push_back(e); + feat_dim = std::max(e.findex, feat_dim); + } + row_ptr.push_back(data.size()); + } + feat_dim += 1; + // close the filed + if (fi != stdin) fclose(fi); + } + inline size_t NumRow(void) const { + return row_ptr.size() - 1; + } + // maximum feature dimension + unsigned feat_dim; + std::vector row_ptr; + std::vector data; +}; +// dense matrix +struct Matrix { + inline void Init(size_t nrow, size_t ncol, float v = 0.0f) { + this->nrow = nrow; + this->ncol = ncol; + data.resize(nrow * ncol); + std::fill(data.begin(), data.end(), v); + } + inline float *operator[](size_t i) { + return &data[0] + i * ncol; + } + inline const float *operator[](size_t i) const { + return &data[0] + i * ncol; + } + // number of data + size_t nrow, ncol; + std::vector data; +}; + +/*!\brief computes a random number modulo the value */ +inline int Random(int value) { + return rand() % value; +} +} // namespace rabit From a1a1a8895e3c9d5301c678c217d3100985c4ee96 Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 18:23:58 -0800 Subject: [PATCH 060/531] add kmeans --- toolkit/kmeans.cpp | 9 +++++++-- toolkit/toolkit_util.h | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index 5811f7fc8..c8884417c 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -81,8 +81,8 @@ inline size_t GetCluster(const Matrix ¢roids, } int main(int argc, char *argv[]) { - if (argc < 4) { - printf("Usage: num_cluster max_iter\n"); + if (argc < 5) { + printf("Usage: num_cluster max_iter \n"); return 0; } srand(0); @@ -131,6 +131,11 @@ int main(int argc, char *argv[]) { model.Normalize(); rabit::CheckPoint(model); } + // output the model file to somewhere + if (rabit::GetRank() == 0) { + model.centroids.Print(argv[4]); + } rabit::Finalize(); return 0; } + diff --git a/toolkit/toolkit_util.h b/toolkit/toolkit_util.h index 71bf888d0..e1ccc7003 100644 --- a/toolkit/toolkit_util.h +++ b/toolkit/toolkit_util.h @@ -77,6 +77,25 @@ struct Matrix { inline const float *operator[](size_t i) const { return &data[0] + i * ncol; } + inline void Print(const char *fname) { + FILE *fo; + if (!strcmp(fname, "stdout")) { + fo = stdout; + } else { + fo = utils::FopenCheck(fname, "r"); + } + fprintf(fo, "%lu %lu\n", nrow, ncol); + for (size_t i = 0; i < data.size(); ++i) { + fprintf(fo, "%g", data[i]); + if ((i+1) % ncol == 0) { + fprintf(fo, "\n"); + } else { + fprintf(fo, " "); + } + } + // close the filed + if (fo != stdout) fclose(fo); + } // number of data size_t nrow, ncol; std::vector data; From 8175df1002fe39eeb0f608931a5a7ae0d82517e9 Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 20:05:16 -0800 Subject: [PATCH 061/531] bug fix in kmeans --- toolkit/Makefile | 26 +++++++++++--------------- toolkit/kmeans.cpp | 4 +++- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/toolkit/Makefile b/toolkit/Makefile index fc73737b7..4d96e6bab 100644 --- a/toolkit/Makefile +++ b/toolkit/Makefile @@ -1,7 +1,7 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx -export LDFLAGS= -pthread -lm +export LDFLAGS= -pthread -lm -L../lib export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src # specify tensor path @@ -12,31 +12,27 @@ MPIOBJ = engine_mpi.o OBJ = $(RABIT_OBJ) kmeans.o MPIBIN = kmeans.mpi -.PHONY: clean all +.PHONY: clean all lib -all: $(BIN) $(MPIBIN) +all: $(BIN) $(MPIBIN) + +lib: + cd ..;make;cd - -allreduce_base.o: ../src/allreduce_base.cc ../src/*.h -engine.o: ../src/engine.cc ../src/*.h -allreduce_robust.o: ../src/allreduce_robust.cc ../src/*.h -engine_mpi.o: ../src/engine_mpi.cc kmeans.o: kmeans.cpp ../src/*.h # we can link against MPI version to get use MPI -kmeans: kmeans.o $(RABIT_OBJ) -kmeans.mpi: kmeans.o $(MPIOBJ) - +kmeans: kmeans.o lib +kmeans.mpi: kmeans.o lib + $(BIN) : - $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit $(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) $(MPIBIN) : - $(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) - -$(MPIOBJ) : - $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) + $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) -lrabit_mpi clean: $(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~ diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index c8884417c..24f784923 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) { Model model; int iter = rabit::LoadCheckPoint(&model); if (iter == 0) { - rabit::Allreduce(&data.feat_dim, sizeof(data.feat_dim)); + rabit::Allreduce(&data.feat_dim, 1); model.InitModel(num_cluster, data.feat_dim); InitCentroids(data, &model.centroids); model.Normalize(); @@ -121,7 +121,9 @@ int main(int argc, char *argv[]) { } temp[k][num_feat] += 1.0f; } + // call allreduce rabit::Allreduce(&temp.data[0], temp.data.size()); + // set number for (int k = 0; k < num_cluster; ++k) { float cnt = temp[k][num_feat]; for (unsigned i = 0; i < num_feat; ++i) { From 9abe6ad4d8d84504f7bf823f68415fe58c78cfeb Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 21:30:11 -0800 Subject: [PATCH 062/531] checkin makefile --- Makefile | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..bbed21c81 --- /dev/null +++ b/Makefile @@ -0,0 +1,35 @@ +export CC = gcc +export CXX = g++ +export MPICXX = mpicxx +export LDFLAGS= +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src + +BPATH=lib +# objectives that makes up rabit library +MPIOBJ= $(BPATH)/engine_mpi.o +OBJ= $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o +ALIB= lib/librabit.a lib/librabit_mpi.a + +.PHONY: clean all + +all: $(ALIB) + +$(BPATH)/allreduce_base.o: src/allreduce_base.cc src/*.h +$(BPATH)/engine.o: src/engine.cc src/*.h +$(BPATH)/allreduce_robust.o: src/allreduce_robust.cc src/*.h +$(BPATH)/engine_mpi.o: src/engine_mpi.cc src/*.h + +lib/librabit.a: $(OBJ) +lib/librabit_mpi.a: $(MPIOBJ) + +$(OBJ) : + $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) + +$(MPIOBJ) : + $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) + +$(ALIB): + ar cr $@ $+ + +clean: + $(RM) $(OBJ) $(MPIOBJ) $(ALIB) $(MPIALIB) *~ src/*~ From 1c76483b4b1530aff12ce7e9c8d8758748a0b32f Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 21:53:34 -0800 Subject: [PATCH 063/531] ok --- lib/README | 1 + 1 file changed, 1 insertion(+) create mode 100644 lib/README diff --git a/lib/README b/lib/README new file mode 100644 index 000000000..c734c76ab --- /dev/null +++ b/lib/README @@ -0,0 +1 @@ +This folder holds the library file generated by the compiler \ No newline at end of file From 63bf9c799506a27b40125a68a763622395516432 Mon Sep 17 00:00:00 2001 From: nachocano Date: Wed, 3 Dec 2014 21:58:17 -0800 Subject: [PATCH 064/531] open for writing --- toolkit/kmeans.mpi | Bin 0 -> 32188 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100755 toolkit/kmeans.mpi diff --git a/toolkit/kmeans.mpi b/toolkit/kmeans.mpi new file mode 100755 index 0000000000000000000000000000000000000000..06826d7e755e87f8eb1b0203de5af6da99634c6a GIT binary patch literal 32188 zcmeHwdwf*Ywf~tsAQkisYAN+n85NW$+8GF#@X!oo@C?kvNQhDtCPOkI(LCnPBOr<$ z2~m#YXuOI)sN946NboF)zlbFHQ30{GKcfq%1cXxXS7$O>ygK^x~j_KTU+Jx zhdd$gDkHyc!EcLbFX0*T>5N3|@l*vvfx5=kNj$TEE9Y_fW(j_Tr^{#L?eT;g*VHvu zd+Hi%n!wX2-A@g}sz z<0-K(wWpC4b3T$;qycA-z{!q@uikc#r?K7>ytQFfQ@zI@3Y?Ok&Q2=Ml#A?*#-;wC zUz<`^GE_cuF_%xb6T;KmN&FV+%n*A#_1@FTw;koFON)i_2~X!o?e!q&kH=H(4S98J zsC?1_&TpCCnZR{jrAnos)V$wShTYYPNHE??V1%c$)IHuy$Z`g{KJ%7dn zpUqpoyY>oOdT~}hv)(+E&l|pcT3M3U znqIH87FeddRhM0sEO9Y%NiY5tyr1#~9tTd(WeAi{xIRWGedHO@z=#G$G%%up5eQhz3S9a4HQ{DC+z68|>HHE9^_ll{-Hw#K)#xJ!x+_ne+W|N^D*|KFG8e zFOsCbbMOgcODO6QZ6{Er%h5wT5qFC?F5>MX?hrBiJW-;-JSDciT|0*gM{hH01pw66 zk4`2MKqqKZ82TpA67`s(zOB9cEfA`Ybufb5&@^BzakHYfZl@Hr)t^Ar<=7%>X%q1# zinaHta5PVeIwvTxZH!dyO6*r#H56`nj^0w}3hntMnN?x=ooMnHd~4a_>dVp1M5f(z zAsRm^oA{miHa@$k70=VB{?qhbZr@IChi$L60lcEDJyCZ%>7*Kbmy zjy6TzspKD5I*(^5>dQ+0!RB+oZIdc7cT80}2f?OOIrf_JH0q+~YDHG|DJHa{AVwl;yDpFoSeJ8X%Xnwl%*NgI(yVyx)U-k?(XN0iR~>>b3`^x6-OeXdmP zq$18}!5(L{WKUkvK&S{p$CaoXZL5el<)>cFA5dbUV>xO3j>zUw(K{get4HKwkOP=~VnmF!= z{JjH5yO~ z%gbM(hPAwN;%Sz7Nsa7(U9zE!=nvUE`9b-6@5$pPKd7|q1k?TDj}&#EQUp`z3Z2)6 z$|pXE+9Acbi4RiMMLppUk*6f~xJ9Q+|)_WFC^27mDqF3K&GgB z68n#~5@-s2Ly2y#1=K%UiEaKK<;Lc{IfVvCiF#BknFNNsl(yX>NhQ&Sc9;63M08!P zWr(On0I7Nb77H-S#^Ib+8t!uJ!(7l+@u@&sKpq?U>!0>S>5Qu67cv32J3iMr+X{JH8XJ?9ZG&T zhIzJ<=tgVvkSK4w6vtgkG>mcQz_{DYhIX_e0%3Wpo*SgLaM4}S3oznx6m{DKly%tN zfVo`WhtZX^uhMAAYk6+9)1CiLjeJAg(YiOZ7%e37MX`zRU@(>N9C=cNIx!kN0Ul%I zsr%*e2jo5b-z5X=kjK9!?|JQAn4!G<_cih|SoE&E#NP0Jm&yjA(sDdY-arcmDsZVW z3)tJACw}tA-FRb|bl1q=jAK}IirPzKbKe0tl_=G2M}DuvcGSMZ1Y!)w)O_Vqe~&rg zEy%tXvbxpdl$(~1!M5KX4PXL#`-ojOpMkL_kAH)Nh2WDR_&f5RcOdwC^75A;_&#UL zP7=Hu677dXdm+(24B*bg@``S^e9foMM5nU|Ia52s8~exBw28(*IG6m?Hc~yCF=0~F zMy2&r7oUbgGq@^|A$0ozvkyp-4V}77M~}$Ma(+jXQj za+kdb2qdmKkC8RF)!on$8H?OH9d)~+Z2a|`ll_~t;mvG3@WqEMU-s=-(sl`|# zHYs88(V)FHW4tU*UHx25-wOWltVzHd=1C!hK!=Wc4!_7f;ZUG2PJ_cK&9 z6u^LH3YDMQsYD;+Q^3SAWVov4j!pZqGdA%qS5d!PebKFUulcr;&87(_2B|ZabC*)o z$@ny1iutu0lDwm+UnuH8O`B{!M_##K9%p8oi_u;3e#JbSy^R{@GW~hZW>T2kYGcihtwrQ+-79Z=2ZCduVxc)IZ~P-e zR?tGTjasCrpV|`gBn-sA#j$AF;$nQ&$W5rqoP{Ypjs*oH0)r_L=W~B|pci%-hdmyW z@7_RMTKDcCZXqu%S9zE}V6WX6$5>c8(LSZ6S1c^&vxeLD%I0&U^7qi-bEABb>ZbJs ztHwb{#MhItw4Si`^7Z5(ttX!>(`OqSp)5!T_>52-QH)($9Ai7vw|Jo8anUerHpH^Ot$8V_B8r29qE*+_D# zd)(0L=D7UZL1b+}mW>pt{d|HX^`D_cbJt;0<%(Lx6p39*1azfD_l7O*Qjcl-LCGf3 zB1cPhXFM3&fP!@7S+IY!-iAoPtFED*SVbWPg}a9hBh zj%upTE$YS&t&^=QMZXU`1xW@j0LdI{D35A&f=-)tCRNQ>m)1lmUtg|@21D4dD%kUV zt{mG>+XC9msWBS6nh(teVfyx$wv{M<5!+j$FgA=s3cK61%UDTl)dX#~B4`Jb6%DeA z+}&D&wPXEOownnYv>n}5PlAXzI@IFQ5(-}Bi%jC_} z3KmANxEOujhLKNN8B)HYjjBJJ6?Q-|oE(bb(2n@Kj(oUtk8W(7Ou*y6n8C)}I`6GG?VQ-!r_w9$hsFQ5l$9l<0z~`WFf+5ML~ zxp!-qGCo=ILp1n_;1litu$?SqnzO(fx(xlZgII*dzy#uoO8S_J=?)EAJhgv9`uc+} zm=2fvfwln*qKgoQ$<&+=xX+fdD{fY37ro8#KhJt-8PP=P$?x%i^ z6>P@fm9$FY+_F3G#udA;o-zBro=y@5kO?_A#zWtg^IK{EE$7E*c+2^l%UJYSC5s;S zv#22==ZD+m{PpeFQ?aF$?`!nY|9xPh5pc#*!u0ylRMOQ zK)Zx=7HgFlt)o|oIbiROLtTzPS|JclE%Fc@b!<01)j=3;l<_YGE1PqYgv;@q4)$Z6Gu+@rQXS8&J zGrACF-KFIJLFr83$m5`rf3o>3Y$vq?=yReH7X5&jlKfaLURI7BR9Zg30=mbk;=2y3 zwoG+LuT(nU1rrj5td_4>$^9~uANJew5zJ<|29#kkp(dqTP?kpmIxkx|fuG309;4Z4 zl9p@|m8sB31e{Yqgl1538IaSm+sv#brSIfQocXs3*@IZ8E!5P02q*mAHxq zf?j8Iw35F|>HH$gWjcaG38k~|s*p@-`PMkH$k9~Bng60o?PI$U{g7x94iSROocq6Y z7QN+?=l8p#tMi=syPcgUvTS?ZtsiekS4m-4|8i`2Td_eS=^U!TCnwW$Z^| zV>e+r>T#PsD2+|)a7HINqc`L!`L8RO@;dvnocS-6#tOiIofBRtOOji?(ycZ@%Z?rh zVpdw;2#rS<^A6UxC4H00T3Da!NxRtiquFYZ1W=EB-Lq9R_YHBxsJaJ=Hzu$mMFNEjWwo>iG+21W))kM6!ldcDQTV9l)gYcr~R0=r?g|kQU_UHC-Jb4@w4PEt?&y> z;^KTtAMfn^IBBn=$z-{4;_L$ZK{C#Hp)q8f>%ff}=V}<|Zu%7K{5(5(f+>@6P9|G) z7QG4Ud=Ewp>wMAK`DHd)=U=#Weye|q^I4J=&!jXG7~1<#jHH#I7Vw6leM~r8u*PSB;F+e5u58=fR{?`w#MGz?;$Xk);I3Ln@X& zY~gbE(?%tVEpmG$R@M?VZf_aPZoU|YnlEF%w8gb7sxF%QZMN$oSAzp5%enhdtiMr- zomoV?)~cL46;pSj7a5-tKjxBPbQ_3nA<|#VO#dmRGc%phk4O41GSgQP8ZE}g z&T&XR)bd3(R*Tp?n$jVn2cMh$6KGJr_YIK6&ioA-cbZU3Q_SMvO?w$8)ZEPU_bHu( znw**bCrT%w#%88JhIAnmW{`Qb_i@D>J+7EL{yf{JsK-mxPwk06qPr}#T@~MfUD_PT ziFuxmVVTzj1D3XP6$lLotq6}Hq<gAs&ynvODDQ# zjKcQ6hnQVa@0SSsFziQG^j!+IrR>GHUs=&dmk(3XddcUUoOF(nogR!wZ~I( z97c}}D@VrupX}Sf53V0qlHuGG!=IdnVaERIbo_pVjh0Z63aW_^pwdPd3*!s>kbJ6dkc=&|&7D`1O zC&aYE^O3iruN@hDKdvzRhbg?J3;EO2@Q=){Xa%;i*I?_n4kLB$WJNtjg6-5k#^qbi zJZX4OwXo=_`Q_PCKCXkBr zM16i`G%@#JQqJp0&ZuKhRGh&N^+)m9L@MJ=<4Sk2F<^(pzf3lg**6-9Iq8cMrjv!F z=K)16m`p>LOx>mKb*R{wb!roMr8JCxO`8hiFrI+jQ09J<_~UB=HCR!DE4qR-IRU54 zmZ)PYpCi_5ihBEG2oRcVR!b)1l#QxpqZT#0)kAJ|zZS`5&DBtI(Qq|e`5v6_2g^{E z5+!@XO62DyN#+Dw0s};NL>M^H!!d2D3gTANed*IkRN>>-=Px#5~gPH>7I4E*PZ|7<#QBOuI zc9*DqF6<6nQFo8_pZI3SDgGTeazqc~jEC>=lk$@*RYeBQO0cvgejomXw%IMar>4#z z`VN9ehP|-yvK*=DkOyltm!^jX zR(`O7T?9X@=>p-nUE0dOCFnLl+W8gJH{8CA z%Zk1~gK$Vb{B!8C0M<|?if`;Y^N3sY%mW)fqS{|BNl8qgN!_dqHKCKRWME$^1h@^J%BEG7lF~Bp2=M}kIgZ}&I2SxIckU+k;odz;z&Z1z5vy~nfn1ol3Uy(hBw`Rr|B?+e*`GJBJr5>uwKH|?L-^ z73^(g@9FGaz}~aiyO6zSv-e!~zKXqz*?T^Fm$3Ij_Ey+?5qrDXyOh1l*n0_kFJ{whc6`wc;L1pShrwFI>jbQeJn z6Z9}a_Y(9of;JHJD}vS%)JaeyK|KUj6Z95A%L)3BAeyRb2MJn8&}i6R-&}&m6EvNm zsRT_WsEnZV3GxwiE?flTzQy z1WhJrH$k%rdYYheg2>ZXdkaBN5Og;|KP2c0g8q#lIu+{MM9^Oex`UuIF=6$E2)dl0 zn+YlQ1pSmC z8#YIMTL?n8NqzSvp^ZssT@vyW4|;x8RU_%%WcAu0%?h}m;4 ztN}`sR!~j^$PI{B5u%|try*u#s2lOgBCV#JtAJ3JC1o9e~u;()j>nb2D zhXy_^W%m%-&3NHRPPz@T=~le{#!KjbotJp@St+}K(qz^s0f2?{ZAzGk7xA$oHqpq# z`pqB8#SHW4u85RvC-6`7qK5$NRi*a{c@H5%06kSG1V@n+qrwrq;1T0bN{%=6Bmzi| z4=AAz0gHun9Km!LFM}L6PN`mD35L9Zki{FaObMPLv6O(1r`3ggfq7Ffc?Lt(P2rHV zMw(I|wji7#O<65fc^fT_O(9E@-`8lVsjK%{E}Rm?dO>VZpsrC0NK?Z2fA*9bsjjgZ zhE{EKFB>=b9S*&VWEDlHnYv)Q@Bn?HBPlZsBoTK974Z=5fcRJiZskL-Y@e zxLw4*6!C9F{Je;}MSMWSZ;JQ>5f6y?xQKI#IRCRmJVC@4ig=ocXNkC2#AJUY*CK&0 z74b?D*NQkG;@d@hmx$X${D_EuCgT4T@pcjK6!A+UJ}BZ|5q~J+zl->!i1TK1`OX&c zL=j&sVylQ}i?~F@r6OJ?;#DH97jd(QBO<;>#1D!1Mv9QB-VXu|KelR{+Iel6$GnCiUi$(fW!KYW)cSa>?h+pIv zU%~m8iMaQ2j#mo2L&VZ|1)Yf7MLZy4t5xKSxKhM^5$jdcAG?ztIz-$nVzZ5>TSZ(c z;x-Wv=;_!{Q2BNdcZj%G#BDP~zKAXh|BbJQBQ}V9=$zndjE@h1^GxcqJa?& zjA&p)10xz3(ZGlXMl>*@f&Z^GFz&+2iiMTR{H1okxqQJ&d!^a#&$|I(ITDuO#IM*x zKWXQ!y2Vpfdy8jv*c+&pt_!r8+IU8_FA(rcI#Sn|f^nim2>oUi`;9%%D*UXQFCaze z>I!>IZD)hYa&`o2vo5&S?e^So?GjgsLyF`PN1iid{ zRdT&n*CUw`GjIUhEngmPaMYZpJS)7^DURSHTXIE0>P$T{ zn3bNE>_qA&9P@J}=oF_txsUV+a@MWrHfQEZed;7WB6Mw}NI%PuL=4=@^+ssPKI3AQ z57QW)mfo1HMs}xnLqs|ZjW7So#uz9v8e?Xna@&ezn#QRtOA2Tvnp!IOk3BB&1;hA- z#x^N1z>+*^^Zl7;f#P*WWtn+Wl(tqT#-Mo3LneN&Ke+``Zr zoOst#ZaYKiM4hm$4D;g$EJHPfcW^u$sv>N=js*#eGc1Thc}ChzXoIhz%6}{6_7Lh1 zhZyN0LU0gXO*y@U1a(YfSSV1{fY}l46=*$|XOPh5U}H+6P1&hQa=O*oSQldcB%f2C zNrIS+z^EGo$TD&kFdwA(py5Ewu&{BQc5VX=PY7D4{z&0o{Zt5F%1e&#*a|&A9l7 zGP3p5UOkmhHlBL=l|kaB=^5mlVDj@;4H(iJLv^deO<~qkgPD1(Vfd-aK)}m-HIgH= z%31R4T3X1~`dH^~8kV5hR~2dskW6jE<%{y3RS=zNXv=U^@DV`|td{oS@`K@3NjbM> z5WtP&3;8fQ@WY~zp(8Uh%nWaPM&fGNhfW?Jl7pY14fz_mX?G9F^))v8>M@{LhYrm} zcf%nBF%AvM)9tW#NKPma_AyK0MYRlqH1!W;WCYV%F*qbguPu_B-lw`%ZOTYZ>p(rd zqR}txc}r$ehDEni9$WRX+LJt3*gde%`c6L1&f2fwUsXfx!aPqy)z-5B3*zAxGKbDlGPx?!HEz4Qv99e+?WA1$F;tyFa}61 ze2v(VPF*h9$gh|!_2AS$ORR-b@lt7TmXtMQTO)adcV2qRBxQZoCgpL{S8aoSY0A#& zaZ1@a1B>8@FQB>DQCC)0C2h+p)F*SO;OVR)%;MrUK(q8hmd%>EDTJ_&X3k%GTAlD;o``vT0kH z9og(M#Pi4+XogNAYmlG5+xT;KYOD1ZP_?ZARR>X(N`u)#vTZp@f$qs+0(DZ#@B%%R zqYLz2PEO9&yG&9}PMqG-A-prBdo_ni_ZH{|HZeN7Ns*P3D3OycYilm}-NWecW>3$> z1Lwta+D+|Q=5~`cZi<)&vf^3o_|IxK$Fqygc*^K0wg9qPtrn}b(qC-0c9zgWm@9)|;VwrPO43aTIS3JyK}0_V%;?C)PIP0^e&QFX zlr88c119Npx;)UapVm#I`AS|?!TF|$6t{3@h#0Q>*WgBFxbDw|!Et)jPL=!h{3#sY zB5>VL*C}w_?|WF_x}TVCkx>1*f7mZ@-9P+Wf$RRaw*{{IqdpP1?w2|#aNU1LmuQH; z?#H8Fl_6aBbG^Lq=e;a&-Ou->z;%D$Md(rDulxP%0xz?2{nQ9t z_XD;G+%NL~L*TkU@Q}beME+=43h~$dgEoPibw8lMtpdMS;JV+Cem$DXw~73Z1wJ5f zORmt5@EaBhT=y$37r5?UTqkhd&-inJ>;A?+>g9#M@H4%XGw_6z^wbpqG@k1Yb%{gC?vuKOd;%H!pA zzvNtjn}t8JO5nPmvQ6N+zw&p$&w>v0-}B!qaJu^f)0OA~GUd}97{WghINgOI+=NR_ z=Z}p@`hATv1+L%MxKQBwJr0||drEluGT<`wt>6EM3S7SjvWMXkyN5&dU4$D|G)8+a zp4!`;Q+Ixb8RpHQ^>RTE^?| zVz|V9r=8mSU%(M#l=9~d1AfGSe`&zSL6_;}Ee8B@1Ae6eUu3}PeqcJke>LEAF)%&< zAp`yk1Kw%C-!R~RH{j>sPe;=EPcz^P4fst4{AL5b-he-3!2iR5#|`)^2K-|KPJil> zF7MfAW#Ur|c!2?T81U;1cn!x<1%H}3j#Yy{_Zsjg4frz#e6In2)quZmz>gU4ESa@0 zS^wDv{2~KB*MMKkajY8rx!-`lX}~YR&_Uyq^z{aOy#fE30e_j{k!1OU2K;RU{=NbK zh~Xpxe?H@Ql3(7qO!>~|c#_^~z!wfIrFcq&;+VJUO4eYoON*_$LPZGXwsm0na^~={u=kvjN9)%Qn5#TUh$o<`zqm zgwGM_K7)s-f?~8v>7nS?H zfuPUr4HdYWyw!A8!*ki+9Td@F0fQVj-<(#8f9V#vnpT&wqirg-4*BFHYFy3Eqww!& ziBps`e!Pw)8Qy~{o#i2%dp0;2-D3WQT~O|A_Bl>R$PcDw@v|*wP3GwnygH6TkH_oB zF?_Y7VTj~27u5QyZgH#7RPJwVFv4Q zn&$V0YMqN&cJq>@9#(s11r3?vHk^(RGLu{uXZ&@C?aBrGz?jZY`H{1uj@3qFI7whf zTe_cHi<0=ls9reR8EhHBQ9p(xZ8YHt)Hn|=e zLyjf&I3pXbqM1aLcJ!P^nbEPc7Qi=xQ##&RnKc!V*8}6NxsH6EbP`wYDIj_=eli*H zL*@T2c@gpEjrhu?uH=~=GF=NQxus|67~-_bZoYglq*Dxz0B@nW<#D22sK=H+NP6D z=nfDjOI=xg8BSA&Ybhhv{UMGeb+9x3|1iX?X7Wim%Bq%dJ!IHMVL2)mk}V!)T44FS zyjT;&tin%_`C%~kyO(>C(}u&bipiqWirWk{=`h7$7P3uGcdU_Z4&{<+4;3hI2GiYz zq9@Y*=`$V7b&;IUdESt}aw|%nXr~`&!%T);9^j+tRBYx4uvS)igYa!MH*npag49+> zOQ^jDtD%Gaqlk4)jT!O}k6!%#FelLE z7W#)J>wVTCDYl_WvxX2ASjo7SUN4UF8Em{cl1ha6F;l>um{UlYZYe{2IbY?QNp;|m zqF?3sNfi}+|8@FnhA>Za8x6@zxpszRr8}60WM=rAkej?}!{F4#loXnpW>_EUcVtd& z*~UvUDNhP#db&Pcs$ZHR(zJUrDbyg2euw2W;^-uz%QD3=WTtz1Jf5L$MGx~hX5PtQ z7_BIV%NQ)3W+I6}hU*u}G`5{glCTRGERR-io}96c50k|2RwN6hq3M%8FPMIff|U>n z`ertrif?qq#fdaxp04j5hn5gK#i1FQcO-^z)GtR2$6RyiozJhWn+%UP&EFcB*Tr zpk!AADW-d-h+)7i!W~nbN$|UVbU0zS!hxh=Zl5C6a2qfqh2KzSN&Idy2kB-qhuFPj zesd7p6`FCyT|rL`za;3XVYdT4HR2K;0J;yzq97uE15m&EcbY4{r@jZoFADM-g5qi* Y-QA%(ffUnaz_eR{L#|Nc42_=u3xSvb)c^nh literal 0 HcmV?d00001 From dece76708475e3d4f78017abf9aaaf43858c0282 Mon Sep 17 00:00:00 2001 From: nachocano Date: Wed, 3 Dec 2014 21:58:33 -0800 Subject: [PATCH 065/531] Revert "open for writing" This reverts commit 63bf9c799506a27b40125a68a763622395516432. --- toolkit/kmeans.mpi | Bin 32188 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100755 toolkit/kmeans.mpi diff --git a/toolkit/kmeans.mpi b/toolkit/kmeans.mpi deleted file mode 100755 index 06826d7e755e87f8eb1b0203de5af6da99634c6a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32188 zcmeHwdwf*Ywf~tsAQkisYAN+n85NW$+8GF#@X!oo@C?kvNQhDtCPOkI(LCnPBOr<$ z2~m#YXuOI)sN946NboF)zlbFHQ30{GKcfq%1cXxXS7$O>ygK^x~j_KTU+Jx zhdd$gDkHyc!EcLbFX0*T>5N3|@l*vvfx5=kNj$TEE9Y_fW(j_Tr^{#L?eT;g*VHvu zd+Hi%n!wX2-A@g}sz z<0-K(wWpC4b3T$;qycA-z{!q@uikc#r?K7>ytQFfQ@zI@3Y?Ok&Q2=Ml#A?*#-;wC zUz<`^GE_cuF_%xb6T;KmN&FV+%n*A#_1@FTw;koFON)i_2~X!o?e!q&kH=H(4S98J zsC?1_&TpCCnZR{jrAnos)V$wShTYYPNHE??V1%c$)IHuy$Z`g{KJ%7dn zpUqpoyY>oOdT~}hv)(+E&l|pcT3M3U znqIH87FeddRhM0sEO9Y%NiY5tyr1#~9tTd(WeAi{xIRWGedHO@z=#G$G%%up5eQhz3S9a4HQ{DC+z68|>HHE9^_ll{-Hw#K)#xJ!x+_ne+W|N^D*|KFG8e zFOsCbbMOgcODO6QZ6{Er%h5wT5qFC?F5>MX?hrBiJW-;-JSDciT|0*gM{hH01pw66 zk4`2MKqqKZ82TpA67`s(zOB9cEfA`Ybufb5&@^BzakHYfZl@Hr)t^Ar<=7%>X%q1# zinaHta5PVeIwvTxZH!dyO6*r#H56`nj^0w}3hntMnN?x=ooMnHd~4a_>dVp1M5f(z zAsRm^oA{miHa@$k70=VB{?qhbZr@IChi$L60lcEDJyCZ%>7*Kbmy zjy6TzspKD5I*(^5>dQ+0!RB+oZIdc7cT80}2f?OOIrf_JH0q+~YDHG|DJHa{AVwl;yDpFoSeJ8X%Xnwl%*NgI(yVyx)U-k?(XN0iR~>>b3`^x6-OeXdmP zq$18}!5(L{WKUkvK&S{p$CaoXZL5el<)>cFA5dbUV>xO3j>zUw(K{get4HKwkOP=~VnmF!= z{JjH5yO~ z%gbM(hPAwN;%Sz7Nsa7(U9zE!=nvUE`9b-6@5$pPKd7|q1k?TDj}&#EQUp`z3Z2)6 z$|pXE+9Acbi4RiMMLppUk*6f~xJ9Q+|)_WFC^27mDqF3K&GgB z68n#~5@-s2Ly2y#1=K%UiEaKK<;Lc{IfVvCiF#BknFNNsl(yX>NhQ&Sc9;63M08!P zWr(On0I7Nb77H-S#^Ib+8t!uJ!(7l+@u@&sKpq?U>!0>S>5Qu67cv32J3iMr+X{JH8XJ?9ZG&T zhIzJ<=tgVvkSK4w6vtgkG>mcQz_{DYhIX_e0%3Wpo*SgLaM4}S3oznx6m{DKly%tN zfVo`WhtZX^uhMAAYk6+9)1CiLjeJAg(YiOZ7%e37MX`zRU@(>N9C=cNIx!kN0Ul%I zsr%*e2jo5b-z5X=kjK9!?|JQAn4!G<_cih|SoE&E#NP0Jm&yjA(sDdY-arcmDsZVW z3)tJACw}tA-FRb|bl1q=jAK}IirPzKbKe0tl_=G2M}DuvcGSMZ1Y!)w)O_Vqe~&rg zEy%tXvbxpdl$(~1!M5KX4PXL#`-ojOpMkL_kAH)Nh2WDR_&f5RcOdwC^75A;_&#UL zP7=Hu677dXdm+(24B*bg@``S^e9foMM5nU|Ia52s8~exBw28(*IG6m?Hc~yCF=0~F zMy2&r7oUbgGq@^|A$0ozvkyp-4V}77M~}$Ma(+jXQj za+kdb2qdmKkC8RF)!on$8H?OH9d)~+Z2a|`ll_~t;mvG3@WqEMU-s=-(sl`|# zHYs88(V)FHW4tU*UHx25-wOWltVzHd=1C!hK!=Wc4!_7f;ZUG2PJ_cK&9 z6u^LH3YDMQsYD;+Q^3SAWVov4j!pZqGdA%qS5d!PebKFUulcr;&87(_2B|ZabC*)o z$@ny1iutu0lDwm+UnuH8O`B{!M_##K9%p8oi_u;3e#JbSy^R{@GW~hZW>T2kYGcihtwrQ+-79Z=2ZCduVxc)IZ~P-e zR?tGTjasCrpV|`gBn-sA#j$AF;$nQ&$W5rqoP{Ypjs*oH0)r_L=W~B|pci%-hdmyW z@7_RMTKDcCZXqu%S9zE}V6WX6$5>c8(LSZ6S1c^&vxeLD%I0&U^7qi-bEABb>ZbJs ztHwb{#MhItw4Si`^7Z5(ttX!>(`OqSp)5!T_>52-QH)($9Ai7vw|Jo8anUerHpH^Ot$8V_B8r29qE*+_D# zd)(0L=D7UZL1b+}mW>pt{d|HX^`D_cbJt;0<%(Lx6p39*1azfD_l7O*Qjcl-LCGf3 zB1cPhXFM3&fP!@7S+IY!-iAoPtFED*SVbWPg}a9hBh zj%upTE$YS&t&^=QMZXU`1xW@j0LdI{D35A&f=-)tCRNQ>m)1lmUtg|@21D4dD%kUV zt{mG>+XC9msWBS6nh(teVfyx$wv{M<5!+j$FgA=s3cK61%UDTl)dX#~B4`Jb6%DeA z+}&D&wPXEOownnYv>n}5PlAXzI@IFQ5(-}Bi%jC_} z3KmANxEOujhLKNN8B)HYjjBJJ6?Q-|oE(bb(2n@Kj(oUtk8W(7Ou*y6n8C)}I`6GG?VQ-!r_w9$hsFQ5l$9l<0z~`WFf+5ML~ zxp!-qGCo=ILp1n_;1litu$?SqnzO(fx(xlZgII*dzy#uoO8S_J=?)EAJhgv9`uc+} zm=2fvfwln*qKgoQ$<&+=xX+fdD{fY37ro8#KhJt-8PP=P$?x%i^ z6>P@fm9$FY+_F3G#udA;o-zBro=y@5kO?_A#zWtg^IK{EE$7E*c+2^l%UJYSC5s;S zv#22==ZD+m{PpeFQ?aF$?`!nY|9xPh5pc#*!u0ylRMOQ zK)Zx=7HgFlt)o|oIbiROLtTzPS|JclE%Fc@b!<01)j=3;l<_YGE1PqYgv;@q4)$Z6Gu+@rQXS8&J zGrACF-KFIJLFr83$m5`rf3o>3Y$vq?=yReH7X5&jlKfaLURI7BR9Zg30=mbk;=2y3 zwoG+LuT(nU1rrj5td_4>$^9~uANJew5zJ<|29#kkp(dqTP?kpmIxkx|fuG309;4Z4 zl9p@|m8sB31e{Yqgl1538IaSm+sv#brSIfQocXs3*@IZ8E!5P02q*mAHxq zf?j8Iw35F|>HH$gWjcaG38k~|s*p@-`PMkH$k9~Bng60o?PI$U{g7x94iSROocq6Y z7QN+?=l8p#tMi=syPcgUvTS?ZtsiekS4m-4|8i`2Td_eS=^U!TCnwW$Z^| zV>e+r>T#PsD2+|)a7HINqc`L!`L8RO@;dvnocS-6#tOiIofBRtOOji?(ycZ@%Z?rh zVpdw;2#rS<^A6UxC4H00T3Da!NxRtiquFYZ1W=EB-Lq9R_YHBxsJaJ=Hzu$mMFNEjWwo>iG+21W))kM6!ldcDQTV9l)gYcr~R0=r?g|kQU_UHC-Jb4@w4PEt?&y> z;^KTtAMfn^IBBn=$z-{4;_L$ZK{C#Hp)q8f>%ff}=V}<|Zu%7K{5(5(f+>@6P9|G) z7QG4Ud=Ewp>wMAK`DHd)=U=#Weye|q^I4J=&!jXG7~1<#jHH#I7Vw6leM~r8u*PSB;F+e5u58=fR{?`w#MGz?;$Xk);I3Ln@X& zY~gbE(?%tVEpmG$R@M?VZf_aPZoU|YnlEF%w8gb7sxF%QZMN$oSAzp5%enhdtiMr- zomoV?)~cL46;pSj7a5-tKjxBPbQ_3nA<|#VO#dmRGc%phk4O41GSgQP8ZE}g z&T&XR)bd3(R*Tp?n$jVn2cMh$6KGJr_YIK6&ioA-cbZU3Q_SMvO?w$8)ZEPU_bHu( znw**bCrT%w#%88JhIAnmW{`Qb_i@D>J+7EL{yf{JsK-mxPwk06qPr}#T@~MfUD_PT ziFuxmVVTzj1D3XP6$lLotq6}Hq<gAs&ynvODDQ# zjKcQ6hnQVa@0SSsFziQG^j!+IrR>GHUs=&dmk(3XddcUUoOF(nogR!wZ~I( z97c}}D@VrupX}Sf53V0qlHuGG!=IdnVaERIbo_pVjh0Z63aW_^pwdPd3*!s>kbJ6dkc=&|&7D`1O zC&aYE^O3iruN@hDKdvzRhbg?J3;EO2@Q=){Xa%;i*I?_n4kLB$WJNtjg6-5k#^qbi zJZX4OwXo=_`Q_PCKCXkBr zM16i`G%@#JQqJp0&ZuKhRGh&N^+)m9L@MJ=<4Sk2F<^(pzf3lg**6-9Iq8cMrjv!F z=K)16m`p>LOx>mKb*R{wb!roMr8JCxO`8hiFrI+jQ09J<_~UB=HCR!DE4qR-IRU54 zmZ)PYpCi_5ihBEG2oRcVR!b)1l#QxpqZT#0)kAJ|zZS`5&DBtI(Qq|e`5v6_2g^{E z5+!@XO62DyN#+Dw0s};NL>M^H!!d2D3gTANed*IkRN>>-=Px#5~gPH>7I4E*PZ|7<#QBOuI zc9*DqF6<6nQFo8_pZI3SDgGTeazqc~jEC>=lk$@*RYeBQO0cvgejomXw%IMar>4#z z`VN9ehP|-yvK*=DkOyltm!^jX zR(`O7T?9X@=>p-nUE0dOCFnLl+W8gJH{8CA z%Zk1~gK$Vb{B!8C0M<|?if`;Y^N3sY%mW)fqS{|BNl8qgN!_dqHKCKRWME$^1h@^J%BEG7lF~Bp2=M}kIgZ}&I2SxIckU+k;odz;z&Z1z5vy~nfn1ol3Uy(hBw`Rr|B?+e*`GJBJr5>uwKH|?L-^ z73^(g@9FGaz}~aiyO6zSv-e!~zKXqz*?T^Fm$3Ij_Ey+?5qrDXyOh1l*n0_kFJ{whc6`wc;L1pShrwFI>jbQeJn z6Z9}a_Y(9of;JHJD}vS%)JaeyK|KUj6Z95A%L)3BAeyRb2MJn8&}i6R-&}&m6EvNm zsRT_WsEnZV3GxwiE?flTzQy z1WhJrH$k%rdYYheg2>ZXdkaBN5Og;|KP2c0g8q#lIu+{MM9^Oex`UuIF=6$E2)dl0 zn+YlQ1pSmC z8#YIMTL?n8NqzSvp^ZssT@vyW4|;x8RU_%%WcAu0%?h}m;4 ztN}`sR!~j^$PI{B5u%|try*u#s2lOgBCV#JtAJ3JC1o9e~u;()j>nb2D zhXy_^W%m%-&3NHRPPz@T=~le{#!KjbotJp@St+}K(qz^s0f2?{ZAzGk7xA$oHqpq# z`pqB8#SHW4u85RvC-6`7qK5$NRi*a{c@H5%06kSG1V@n+qrwrq;1T0bN{%=6Bmzi| z4=AAz0gHun9Km!LFM}L6PN`mD35L9Zki{FaObMPLv6O(1r`3ggfq7Ffc?Lt(P2rHV zMw(I|wji7#O<65fc^fT_O(9E@-`8lVsjK%{E}Rm?dO>VZpsrC0NK?Z2fA*9bsjjgZ zhE{EKFB>=b9S*&VWEDlHnYv)Q@Bn?HBPlZsBoTK974Z=5fcRJiZskL-Y@e zxLw4*6!C9F{Je;}MSMWSZ;JQ>5f6y?xQKI#IRCRmJVC@4ig=ocXNkC2#AJUY*CK&0 z74b?D*NQkG;@d@hmx$X${D_EuCgT4T@pcjK6!A+UJ}BZ|5q~J+zl->!i1TK1`OX&c zL=j&sVylQ}i?~F@r6OJ?;#DH97jd(QBO<;>#1D!1Mv9QB-VXu|KelR{+Iel6$GnCiUi$(fW!KYW)cSa>?h+pIv zU%~m8iMaQ2j#mo2L&VZ|1)Yf7MLZy4t5xKSxKhM^5$jdcAG?ztIz-$nVzZ5>TSZ(c z;x-Wv=;_!{Q2BNdcZj%G#BDP~zKAXh|BbJQBQ}V9=$zndjE@h1^GxcqJa?& zjA&p)10xz3(ZGlXMl>*@f&Z^GFz&+2iiMTR{H1okxqQJ&d!^a#&$|I(ITDuO#IM*x zKWXQ!y2Vpfdy8jv*c+&pt_!r8+IU8_FA(rcI#Sn|f^nim2>oUi`;9%%D*UXQFCaze z>I!>IZD)hYa&`o2vo5&S?e^So?GjgsLyF`PN1iid{ zRdT&n*CUw`GjIUhEngmPaMYZpJS)7^DURSHTXIE0>P$T{ zn3bNE>_qA&9P@J}=oF_txsUV+a@MWrHfQEZed;7WB6Mw}NI%PuL=4=@^+ssPKI3AQ z57QW)mfo1HMs}xnLqs|ZjW7So#uz9v8e?Xna@&ezn#QRtOA2Tvnp!IOk3BB&1;hA- z#x^N1z>+*^^Zl7;f#P*WWtn+Wl(tqT#-Mo3LneN&Ke+``Zr zoOst#ZaYKiM4hm$4D;g$EJHPfcW^u$sv>N=js*#eGc1Thc}ChzXoIhz%6}{6_7Lh1 zhZyN0LU0gXO*y@U1a(YfSSV1{fY}l46=*$|XOPh5U}H+6P1&hQa=O*oSQldcB%f2C zNrIS+z^EGo$TD&kFdwA(py5Ewu&{BQc5VX=PY7D4{z&0o{Zt5F%1e&#*a|&A9l7 zGP3p5UOkmhHlBL=l|kaB=^5mlVDj@;4H(iJLv^deO<~qkgPD1(Vfd-aK)}m-HIgH= z%31R4T3X1~`dH^~8kV5hR~2dskW6jE<%{y3RS=zNXv=U^@DV`|td{oS@`K@3NjbM> z5WtP&3;8fQ@WY~zp(8Uh%nWaPM&fGNhfW?Jl7pY14fz_mX?G9F^))v8>M@{LhYrm} zcf%nBF%AvM)9tW#NKPma_AyK0MYRlqH1!W;WCYV%F*qbguPu_B-lw`%ZOTYZ>p(rd zqR}txc}r$ehDEni9$WRX+LJt3*gde%`c6L1&f2fwUsXfx!aPqy)z-5B3*zAxGKbDlGPx?!HEz4Qv99e+?WA1$F;tyFa}61 ze2v(VPF*h9$gh|!_2AS$ORR-b@lt7TmXtMQTO)adcV2qRBxQZoCgpL{S8aoSY0A#& zaZ1@a1B>8@FQB>DQCC)0C2h+p)F*SO;OVR)%;MrUK(q8hmd%>EDTJ_&X3k%GTAlD;o``vT0kH z9og(M#Pi4+XogNAYmlG5+xT;KYOD1ZP_?ZARR>X(N`u)#vTZp@f$qs+0(DZ#@B%%R zqYLz2PEO9&yG&9}PMqG-A-prBdo_ni_ZH{|HZeN7Ns*P3D3OycYilm}-NWecW>3$> z1Lwta+D+|Q=5~`cZi<)&vf^3o_|IxK$Fqygc*^K0wg9qPtrn}b(qC-0c9zgWm@9)|;VwrPO43aTIS3JyK}0_V%;?C)PIP0^e&QFX zlr88c119Npx;)UapVm#I`AS|?!TF|$6t{3@h#0Q>*WgBFxbDw|!Et)jPL=!h{3#sY zB5>VL*C}w_?|WF_x}TVCkx>1*f7mZ@-9P+Wf$RRaw*{{IqdpP1?w2|#aNU1LmuQH; z?#H8Fl_6aBbG^Lq=e;a&-Ou->z;%D$Md(rDulxP%0xz?2{nQ9t z_XD;G+%NL~L*TkU@Q}beME+=43h~$dgEoPibw8lMtpdMS;JV+Cem$DXw~73Z1wJ5f zORmt5@EaBhT=y$37r5?UTqkhd&-inJ>;A?+>g9#M@H4%XGw_6z^wbpqG@k1Yb%{gC?vuKOd;%H!pA zzvNtjn}t8JO5nPmvQ6N+zw&p$&w>v0-}B!qaJu^f)0OA~GUd}97{WghINgOI+=NR_ z=Z}p@`hATv1+L%MxKQBwJr0||drEluGT<`wt>6EM3S7SjvWMXkyN5&dU4$D|G)8+a zp4!`;Q+Ixb8RpHQ^>RTE^?| zVz|V9r=8mSU%(M#l=9~d1AfGSe`&zSL6_;}Ee8B@1Ae6eUu3}PeqcJke>LEAF)%&< zAp`yk1Kw%C-!R~RH{j>sPe;=EPcz^P4fst4{AL5b-he-3!2iR5#|`)^2K-|KPJil> zF7MfAW#Ur|c!2?T81U;1cn!x<1%H}3j#Yy{_Zsjg4frz#e6In2)quZmz>gU4ESa@0 zS^wDv{2~KB*MMKkajY8rx!-`lX}~YR&_Uyq^z{aOy#fE30e_j{k!1OU2K;RU{=NbK zh~Xpxe?H@Ql3(7qO!>~|c#_^~z!wfIrFcq&;+VJUO4eYoON*_$LPZGXwsm0na^~={u=kvjN9)%Qn5#TUh$o<`zqm zgwGM_K7)s-f?~8v>7nS?H zfuPUr4HdYWyw!A8!*ki+9Td@F0fQVj-<(#8f9V#vnpT&wqirg-4*BFHYFy3Eqww!& ziBps`e!Pw)8Qy~{o#i2%dp0;2-D3WQT~O|A_Bl>R$PcDw@v|*wP3GwnygH6TkH_oB zF?_Y7VTj~27u5QyZgH#7RPJwVFv4Q zn&$V0YMqN&cJq>@9#(s11r3?vHk^(RGLu{uXZ&@C?aBrGz?jZY`H{1uj@3qFI7whf zTe_cHi<0=ls9reR8EhHBQ9p(xZ8YHt)Hn|=e zLyjf&I3pXbqM1aLcJ!P^nbEPc7Qi=xQ##&RnKc!V*8}6NxsH6EbP`wYDIj_=eli*H zL*@T2c@gpEjrhu?uH=~=GF=NQxus|67~-_bZoYglq*Dxz0B@nW<#D22sK=H+NP6D z=nfDjOI=xg8BSA&Ybhhv{UMGeb+9x3|1iX?X7Wim%Bq%dJ!IHMVL2)mk}V!)T44FS zyjT;&tin%_`C%~kyO(>C(}u&bipiqWirWk{=`h7$7P3uGcdU_Z4&{<+4;3hI2GiYz zq9@Y*=`$V7b&;IUdESt}aw|%nXr~`&!%T);9^j+tRBYx4uvS)igYa!MH*npag49+> zOQ^jDtD%Gaqlk4)jT!O}k6!%#FelLE z7W#)J>wVTCDYl_WvxX2ASjo7SUN4UF8Em{cl1ha6F;l>um{UlYZYe{2IbY?QNp;|m zqF?3sNfi}+|8@FnhA>Za8x6@zxpszRr8}60WM=rAkej?}!{F4#loXnpW>_EUcVtd& z*~UvUDNhP#db&Pcs$ZHR(zJUrDbyg2euw2W;^-uz%QD3=WTtz1Jf5L$MGx~hX5PtQ z7_BIV%NQ)3W+I6}hU*u}G`5{glCTRGERR-io}96c50k|2RwN6hq3M%8FPMIff|U>n z`ertrif?qq#fdaxp04j5hn5gK#i1FQcO-^z)GtR2$6RyiozJhWn+%UP&EFcB*Tr zpk!AADW-d-h+)7i!W~nbN$|UVbU0zS!hxh=Zl5C6a2qfqh2KzSN&Idy2kB-qhuFPj zesd7p6`FCyT|rL`za;3XVYdT4HR2K;0J;yzq97uE15m&EcbY4{r@jZoFADM-g5qi* Y-QA%(ffUnaz_eR{L#|Nc42_=u3xSvb)c^nh From 7d314fef78a8662508830f3a16baa9bdd0f228c3 Mon Sep 17 00:00:00 2001 From: nachocano Date: Wed, 3 Dec 2014 21:58:58 -0800 Subject: [PATCH 066/531] open for writing --- toolkit/toolkit_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolkit/toolkit_util.h b/toolkit/toolkit_util.h index e1ccc7003..cff7b7fe0 100644 --- a/toolkit/toolkit_util.h +++ b/toolkit/toolkit_util.h @@ -82,7 +82,7 @@ struct Matrix { if (!strcmp(fname, "stdout")) { fo = stdout; } else { - fo = utils::FopenCheck(fname, "r"); + fo = utils::FopenCheck(fname, "w"); } fprintf(fo, "%lu %lu\n", nrow, ncol); for (size_t i = 0; i < data.size(); ++i) { From 09a13056284b904502ad13a7d5bda9ba1e53e6c1 Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 22:27:52 -0800 Subject: [PATCH 067/531] chg readme --- README.md | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 937edd284..68c1ab8b6 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,33 @@ ## rabit: Robust Allreduce and Broadcast Interface -rabit is a light weight library designed to provide fault tolerant interface of Allreduce and Broadcast. +rabit is a light weight library designed to provide fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementation of distributed machine learning programs, many of which sits naturally under Allreduce abstraction. Contributors: https://github.com/tqchen/rabit/graphs/contributors +Design Note +==== +* Rabit is designed for algorithms that replicate same global model across nodes, while each node operating on local parition of data. +* The global statistics collection is done by using Allreduce +* Currently, Rabit is not good at problems where model is distributed across nodes, other abstractions might suits the purpose (for example [parameter server](https://github.com/mli/parameter_server)) + +Features +==== +* Portable library + - Rabit is a library instead of framework, program only need to link the library to run, without restricting to a single framework. +* Flexibility in programming + - Many frameworks only allow user to define limited functions(map,reduce), and the framework call these operation + - Programs call rabit functions, Allreduce, CheckPoint in any sequence + - Program persist over all the iterations, unless it fails and recover. +* Fault tolerance + - Rabit program can recover model and results of functions calls +* MPI compatible + - Codes using rabit interface naturally compiles with existing MPI compiler + - User can fall back to use MPI Allreduce if they like with no code modification + +Persistence of Program +==== +Many complicated Machine learning algorithm involves things like temporal memory allocation, result caching. It is good to have a persist program that runs over iterations and keeps the resources instead of re-allocate and re-compute the caching every time. Rabit allows the process to persist over all iterations. + Design Goal ==== * rabit should run fast From 0e9b64649a1cd573199aecab063fd2d4e07eb02f Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 22:30:23 -0800 Subject: [PATCH 068/531] ok --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 68c1ab8b6..7625c916f 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,10 @@ Features - Rabit is a library instead of framework, program only need to link the library to run, without restricting to a single framework. * Flexibility in programming - Many frameworks only allow user to define limited functions(map,reduce), and the framework call these operation - - Programs call rabit functions, Allreduce, CheckPoint in any sequence - - Program persist over all the iterations, unless it fails and recover. + - When program with rabit, programs call rabit functions, Allreduce, CheckPoint in any sequence + - Program persist over all the iterations, unless it fails and recover * Fault tolerance - - Rabit program can recover model and results of functions calls + - Rabit program can recover model and results of syncrhonization functions calls(Allreduce and Broadcast) * MPI compatible - Codes using rabit interface naturally compiles with existing MPI compiler - User can fall back to use MPI Allreduce if they like with no code modification From 656a8fa3a22c4c541d60e5e5c28be14f11832ebd Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 22:32:30 -0800 Subject: [PATCH 069/531] ok --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 7625c916f..b5766060c 100644 --- a/README.md +++ b/README.md @@ -10,16 +10,21 @@ Design Note * The global statistics collection is done by using Allreduce * Currently, Rabit is not good at problems where model is distributed across nodes, other abstractions might suits the purpose (for example [parameter server](https://github.com/mli/parameter_server)) +Design Goal +==== +* rabit should run fast +* rabit is light weight +* rabit dig safe burrows to avoid disasters + Features ==== * Portable library - Rabit is a library instead of framework, program only need to link the library to run, without restricting to a single framework. * Flexibility in programming - - Many frameworks only allow user to define limited functions(map,reduce), and the framework call these operation - - When program with rabit, programs call rabit functions, Allreduce, CheckPoint in any sequence + - Programs call rabit functions, Allreduce, CheckPoint in any sequence, as opposed to defines limited functions and being called. - Program persist over all the iterations, unless it fails and recover * Fault tolerance - - Rabit program can recover model and results of syncrhonization functions calls(Allreduce and Broadcast) + - Rabit program can recover model and results of syncrhonization functions calls * MPI compatible - Codes using rabit interface naturally compiles with existing MPI compiler - User can fall back to use MPI Allreduce if they like with no code modification @@ -28,9 +33,4 @@ Persistence of Program ==== Many complicated Machine learning algorithm involves things like temporal memory allocation, result caching. It is good to have a persist program that runs over iterations and keeps the resources instead of re-allocate and re-compute the caching every time. Rabit allows the process to persist over all iterations. -Design Goal -==== -* rabit should run fast -* rabit is light weight -* rabit dig safe burrows to avoid disasters From 3033177e9e5e58b50eec4b43aa3302bf87247d9f Mon Sep 17 00:00:00 2001 From: tqchen Date: Wed, 3 Dec 2014 22:36:16 -0800 Subject: [PATCH 070/531] ok --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b5766060c..0aa5cf527 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ## rabit: Robust Allreduce and Broadcast Interface -rabit is a light weight library designed to provide fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementation of distributed machine learning programs, many of which sits naturally under Allreduce abstraction. +rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementation of distributed machine learning programs, many of which sits naturally under Allreduce abstraction. Contributors: https://github.com/tqchen/rabit/graphs/contributors @@ -21,7 +21,7 @@ Features * Portable library - Rabit is a library instead of framework, program only need to link the library to run, without restricting to a single framework. * Flexibility in programming - - Programs call rabit functions, Allreduce, CheckPoint in any sequence, as opposed to defines limited functions and being called. + - Programs call rabit functions in any sequence, as opposed to defines limited functions and being called. - Program persist over all the iterations, unless it fails and recover * Fault tolerance - Rabit program can recover model and results of syncrhonization functions calls @@ -31,6 +31,5 @@ Features Persistence of Program ==== -Many complicated Machine learning algorithm involves things like temporal memory allocation, result caching. It is good to have a persist program that runs over iterations and keeps the resources instead of re-allocate and re-compute the caching every time. Rabit allows the process to persist over all iterations. - - +Many complicated Machine learning algorithm involves things like temporal memory allocation, result caching. +It is good to have a program that persist over iterations and keeps the resources instead of re-allocate and re-compute the caching every time. Rabit allows the process to persist over all iterations. From 67229fd7a9e97a4c032921592f91bfc0d8730cd0 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 4 Dec 2014 09:05:48 -0800 Subject: [PATCH 071/531] change model --- toolkit/kmeans.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index 24f784923..109b49826 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -73,13 +73,13 @@ inline size_t GetCluster(const Matrix ¢roids, double dmin = Cos(centroids[0], v); for (size_t k = 1; k < centroids.nrow; ++k) { double dist = Cos(centroids[k], v); - if (dist < dmin) { + if (dist > dmin) { dmin = dist; imin = k; - } + } } return imin; } - + int main(int argc, char *argv[]) { if (argc < 5) { printf("Usage: num_cluster max_iter \n"); @@ -116,9 +116,11 @@ int main(int argc, char *argv[]) { for (size_t i = 0; i < ndata; ++i) { SparseMat::Vector v = data[i]; size_t k = GetCluster(model.centroids, v); + // temp[k] += v for (size_t j = 0; j < v.length; ++j) { temp[k][v[j].findex] += v[j].fvalue; } + // use last column to record counts temp[k][num_feat] += 1.0f; } // call allreduce @@ -126,6 +128,7 @@ int main(int argc, char *argv[]) { // set number for (int k = 0; k < num_cluster; ++k) { float cnt = temp[k][num_feat]; + utils::Check(cnt != 0.0f, "get zero sized cluster"); for (unsigned i = 0; i < num_feat; ++i) { model.centroids[k][i] = temp[k][i] / cnt; } From 65a1cdf8e5c44e22dd5bd438e8257d45bfbe7cc8 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 4 Dec 2014 09:07:36 -0800 Subject: [PATCH 072/531] remove doc from main repo --- doc/.gitignore | 8 ---- doc/fig/allreduce.pdf | Bin 44692 -> 0 bytes doc/rabit.bib | 69 ------------------------------- doc/rabit.tex | 94 ------------------------------------------ 4 files changed, 171 deletions(-) delete mode 100644 doc/.gitignore delete mode 100644 doc/fig/allreduce.pdf delete mode 100644 doc/rabit.bib delete mode 100644 doc/rabit.tex diff --git a/doc/.gitignore b/doc/.gitignore deleted file mode 100644 index 8c9da0f78..000000000 --- a/doc/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -rabit.pdf -*.bbl -*.blg -*.fls -*.aux -*.gz -*.log -Output diff --git a/doc/fig/allreduce.pdf b/doc/fig/allreduce.pdf deleted file mode 100644 index 17d846d2a6cb47ec4b1d6eb569d92ce0dc1d7817..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 44692 zcmb@tby(D0*FOq_BQk&(G)RbacZbpf(lNBuARyg62qK-*FeoYAC=E)NibJQ;Db0|- zFR0IbKfdSvo$FlJ`Nxr&9V4&HVhS1HwbaL1kxR z1rZUUVplY`g*jVN@d96|QL)QBvvD?e1U_w^I-5(Go7$O~Q;CT|oSYrapFW4Uqbo%< z-fW}74>}ZUPrk?SS#2D8_lE^NOtOIY4O8aUR*;I-X3Lr2Qg7?iK#V-KI>{l(5f+h% zD&>n7(ah-~Ymiid@%M)v7|N^W`xua7MCr3ZF6`Bz`}Xdtv)h=b zPNEhNb6d0D!~*}j>l zI~PY&b0;dkUtdVt**XKiJ5gPV50I6Tx!JR)kL=v3^f-Voc=`FLxOq4YAb_}j?{hit zkC;^)?M&6povHMI&7@?g*yW!CTR!^zDf#376BiZd<>ml2vcNb_&ff#6*maamtjtZFuZGD#=cE$&^UFWv zU4He4yx&$J_-n0SLiw`NFo<^JfIvlH;E zq}_9SJ6m8ZMBxA3EH@ghYahtEuon2yoWnoC>B#r@%U!NHxkI@72ro!Mn>T(jAz zt#fmFdi1l5)rez+*x~&nyTEIl;eZ38>BBu)_eVV+7Kp&3Meq@3){4c+cegpD59zx7x7#!3w4oA6{2TDZTJ=hzKzOL}( z6lZFXTJ>r!8bOyd`UCpP7RrmGPN-(YK{)EKRbt`Gbf^#vH4T~qx^KsL;b>#K+|J^` zQs8_v0$?*LBJlaoU~xI7`(9|QZTXt{xDN$S^}Poie$fW3uYzvIasjogxq(B4A7*e{ z6D@K!G!tDQfH~m$?2VEer2aJT?^~ipM!z``d~nIm<+2|bs1W0<{HRR3ZVg@Ezt)4# zT$~@uXO5F7@P z`WNGJTB1>R>PldgRa0NbrJC2lwnVGZa$pl%68NQke~4MWzjweI9b){ZoN7dViprWe z2gq#jFV>DwgMMu9UTvb=TuPp?D+v)cD0uUavPqbzNV?i^KlD7xz2ms>mt~*IHp(;I z_v58jojs@-)|!*B#xIgC*0f=rQ?22H`u;`AJl>=(VEX@MNk1zQXNqE<%}XW9rwSWO z5FB9(e<{VX_FuO2;PCS&3@`MXh0=$#yH4AJoe%$5z9iVD&SYEJ zcjvAiKDjOnbrCMB>C#{1%YA>7O2;~+wm$-0BvP3DYva?SPVodAsRKxXhDBWv;X75+ z_7c1_$HL|};Eoi-^e)@1AAhfBxSd6L*0q_FpUTTb=#^D|>n{tE9N^f?Z4HpJdS%S=n6U z@$VQmzJ`mY$yoKbCf~K;`ToS{Cr7inj~+gL%ir{&+>7?F{;FgkdzKr|%5+FMtR&GQ z!PKn(?K~3V@6GY3DA!u5Ju1}eiz^hreZQqcUM34C=fhSI`r8p65L{^e{|P8teitYI z-vMRjWcu)5fZ{SI?zTfIe)xfX=A=qA>-oh#UW?Ws+duD2a#)UYw1#g1jC8oV*b z>(h#+xAT{w%@66Wk&!u#uGn%>ZED*sK)1`I8W&S!ZpF~-Bswggxz_3_kE=Hg1XTGv6_=K?WNsAlkZc_SqBO^mtQ{yt81(d#nCU!Ix1u;pMA30wcyVpO2B%&h>Lzx~ApTeArv2R{uU%xVM6AMq_zErv{=Zp z?1y!ixF|35<2T|nT+MtoPYGvod1liZy$aRVzOu?c`?RbpjB1`{eH{yimr5j4H-C8> z%)&eU;9l30KXE@3^cr;BO|fa@xkAee3M98i$88yYNnBOWS6IlC=(p$E`|z=os}P*p%qKboVKS!*6*Mp0gly>5bJS?!}G+<&X>_p%HO9XMz7nZXYAjv z9qSmJtcJdGSy@beMp>mb=&}`I7uVApMG;_&=Dda%S#hRn8hg_$?@uUEfj@>58F{OU zmAc>=hhbw0JZ`Nx73MFEx7GJOD|UQeLv;e%EK*PDb>U+{?k^AVpf1cG<^H+1oJN#)jB+gPA(}PenoOer3l(-(IJ>@qOb>PFRiN z;Rr(%-5uln&kD1``*esOJK96&U|T!u=L9#aWPqTq+dga(^)-I6BFY;VR+>cAv_IX$ zPvo}=$3FN;Dt^vSr0tUG)RcPPAoDv;_(LANYr$r3$b`bz!IwEp&q|_S zCoVFAUjEsh)X-4+#kz3er-fKNZ+Ye5^R%V~n(AonX@{CsRJ>7nOF8j1FoSMnJ5}K6 z-`d-074`EvM=dSg)!(Q%o#3Bln0miXQ~Tca$&BL*&t)1T&!1(hUrHOQYYb1P*~v+l z-kP6q2bZ4BW&F-Z&tgPg^ib zYu9*;QDJa4@Xmj$;iNBy;$W0gu-;YTkT}zSWek=0&uUN5QQ1xQa{&_yw+XSWoCZ(p zqqIj1rcudW)edPTn@DxU#gU&=tifpq-sFKh@~O>>{h9qh>HkQc>K^z3Wec5rfbmqJ z>dt=F-Br)TI)v-kpj=OSVY zU~APwD>p*z1*?TyuV%T59AKE7Mdh{Rd34rIFPA5M&xw1=76{(hwl0^jlOm}8cL;pke3Im6{(NSIy zXvIB3^v!=fBMr18f3DxSB3oy&{+!hkEuS^;e-J|h%J4aW>fw~ryf-;Gb#Ca$_y=sC zpvx=paH|?0rp)u`wiR^9NMKmn;L3bPQ+fU9kNp#9+rci#dmF1=hj!_avtMvFVV=cAL$AinkxUt-_Y|~lDT2?m3oJ0HSeSSv+&LO z+`c|@u--C;T^M^^e5zSLd}6bC&)d?jG)A80Ka#$sjNUClXxpVp$n`p!x%mrrY>S*& z<&Sgu4D~!Az4}hFsJH)!d|4X~>&wLoA-?v$kq{Y4^Z3W0nyseI;pi~F z%E-6r|54GhD*8PAHEe<;+7nG}16sHAwjvAbs6x!WVY+*qx>Mz ze4caimD`MUm_}lg$3+x(Ruu@Msos7b<(b4y=enw%h&9{tS%&Z$SP-6YTa4i*y z*a#@u9WvVYChqZo?ysK)aoO1+qPJ`(*3GT8S&q%~F?`>6z43qW#~%Dajy|(>Q&Yvo zrozIV^JM%xqXpivJ!`Y`l?3G2Ptt{GyI-yojDC`29k`iQX-zYlm7aE_RSO+mVJBm!aW<9;`glcPGmAkLA7w?PJfJ-YsyilZ;K1Nq(>}JFEtmLDwVYcZ*b>?yA*p zG<(0Gn*B$d3@Y%5sWDXFI#JU~9x6^*j8Wn+f=~~f%Uefiqsezl@WKiP!z?Y0DY7tq z5rtct&w!VAysq$EwC&Tg00@=v~NE!5@C7+nuyzU;4kl+vqZ))`@JFbsu`9G?;fL z$i=2vUEUm3>aK0{G9}%f4aw5|N7h^_5<7&G-z2!Q5^Z8;vf6nlyzW!H zOZ>)9ZS$_;Uu&vFA;e%)j?e82*9FZ*obel74- zGt+meZ!WHd&FkHkIm-ad%snNi>->vnrF6T=u&}nbFO}S~ zrQib zwZ>vctqjexEXi1WsU`WhKR`mMuo)tn<1tU}&y>B$%X`wbRd9%8nfMbQIQ&6+^+@gH zJ-=ngnWv_9?9=j;-aj9jF$PrUdsPdE6*^WszN(!66txtDVm0#G!^EFww%bJ&<@^?T znJc)a#x||#fl);Y28z;iosAmfHiz|)6u7$NhVbv2%HM^SOyUWm-3k5>+jza`-N6Cf zKk{cK$lv7Y#+Kj19(T-p=9hU`?31cz4c4vWqIhm9X)E=lX`k-hR)LxRvxE~?ee=cnNlw0(H6o~Y z4sSLxAuqof0%ISMYJNCYVfq`DB;b%P$>zoKQ}D2B^~ zqTVw7m=+KFrDeDY!K>r?>+JJOUrX9MSb||@J<$j^W86(vQYQahB;)Ookz!0^i ztY1@=4dNqR$X!}IKaa(Ye)4M^EmPU_3@?MKL*$2aP;6h9xQA%9*|?+RV^H;!gV}HD zLLl#FcxU~B6>tIqJ6UE{hK(lk@1&mHC96L*&kt;DFS!~wob7*F(mEGKqirYhgS*9g z=bCc89GuK`UKiJpw~uIq%&8OJWpiv0&8<#nVOSG z(l4a!C~a9hpC`hgk7BT~i}F(>f5-?)8o?%CU*D$FVDw!QxH9S|mjYIEzDD>?;uvCM zGYfk&Up{wtCtz*wq>tiP#wTF}{6PLu+6x8g9%e(xEGy{NR8oKo!XozXF?h_iEZ+R# zA!;}ay;@I_Nu_y4vWc7PjE==`#8__Mqa+EUAuhq^W1(|jzhwH82-(DMdvsx<5|5`D zBvWj{NL|???OM_;Crba+yp!BCa6@5sa^LIaT6m*p2rm1`gV{T-@Jp^f7lLR{OW^wc z1=B5BOEzHN3ZvS3J--x0bferC>!a`@PSnBBT)p+d&G~v&8ZxwcgSf_TeK^QpIN zk)ok| ztKn{2pBCBJN9LQ49RdMun`KE*H(RTmVUiPN9z3!6=~XXF<6b$iy~j71u&jPC#CwM# z1=(d-@`IqH7@WYcyMo;1ttynPo|5ix^f zaOm-ODmNMVe zJzwkf01?9C)zl6rrqty>zHnI5;EPWS?7G_E2(SLenvyh~XN(Z}-~Edogd-B+t7iP?>kof1$XP zrg4_%OxIi#Pq!8MzO%JWgXdiMc@?^mz};5d9Y|C9O1-tzQX7K)-OR8+w^e~+ls1Sa zkUaG&jgYu>y`7nEeMC^%7BVJGHOO!LKAH)d$5xtQ$-5F&v*Xtn2X)C|h%vE*VMAQ5 zEC1re|2)In;x*-VuzPBN5fjOt7IIsBccN%(Q;AJQLwO|rY@gDD9?kZVCJ~mK>tbQ& z?~rF%>VIZ{;StaILzx`l%|~NzAn*t|p}W(FHmX*;N}#--#j6?FS*up75pgAw78~&T z84)UeY`p)VoTSO#!WAD!o5twki>z5;u>8Ton)q0U9+}d@akyBIrga;?P|cTc@+(=s z00jX)8m_y2}TP<3@|7;+&=ly_wf>cGDZX+6^N_i6Y#97=LJF*6sKm zrPHCRTSaF0Xa)p#+LBjuI;D*W<>+-6z_N6Of()mQ5rGSH?S+jPC|eDo43l3+x^AXR zuX=Uv=#YCn_wv}vf1SH2FTtjabv>zp67A;rH0xP#L#>;G_9oR2etMR0$ek-^#3m5f zslVg7B)qX`C}`#wHL=|KUTzKuB$D8DQC%=OL7ylimwP1LsNz@OVe3wbbc zEEnP(sbD3PJAF8%E#EF3xa#$E->Z1RkJbkP-?+l1j_gZyqS}hQ`^^*^jVem{*U1gb z6P0B=QpbSMlo1HpiR@e%%LsSJLKA3ehxjmGz;>niTyV3?VhgZB9kr{d zjMT5KuQwyhdee(^2ke7qTDKxguRe})Cn1dQ`vPs$K&YYkLs5^osJ>;~CMyBvLq`q( z&Ul#ZKMq73)djNnc!-nKS333GtoeD-%a8e~t9VB>a^_KN-eZVc`RQ? zu(xZfcT*i5Fs>AWXe5OZaf~p#NAruENakBk)rNPcSIeZ#8d*wgaiqsiwS1PZX_yy-{`d4ec5G^4@GywNFc5}Yvc3$)2)x; z2Ad7>XO0G_HsOrSgwFXQl}`Y@D)4P4stfavYV1Tg+~)=u+cBpIjf+}C1FuAO)J{mi z_I2C5jNS!-#$7d01N8&`y$z!2!j%>TY8hWunI_8GH0{=YJZT_dGZt9j@Ty@MygJZ!Sd1-|HJb-c0nIPmAsp3{%?+)9{ znqpa;sB3qlJ>3s>`+Wx8X(i2V@njIvCA9%l*w5Kf;ij^R_6P^y^}EokxF;tCU#(A* z^&6333E5+n;Xi6iCN5a~lKZJ+PS#_yR-dS@s1BV~o4t^%0y&XSxFdgcGgzdJ?xEDV z_YefFb_DqS^5OAx+2av$CmK67gVJ*muKyLrE|={;Fza@2G%>)C*Uk9f)P3d%tssu6&}%m+Yh zFu-sqIu$$GBf&yl&>0D+s}+^<4X@Tl=O$Xuf9rjGAqk%=oq?z9}e7@m6u|zd5&Zg_^HH(q{l9jWpTI zR0uTLzZ}Y}1nuuyXqvFVACNp*a&@Qrh37^25~={%x|&hwyFuK{*KzU03nW}V$dr6t z_Q`L&+i=@SgCgMyLnIk85nKCeX%_(gWFof&6vaJjj=pe9y&`bF>CyOeUM${Qv}~PS zl3?ED=4-$T;_*idN)nbXo_t%)xxa4*Sxe=ar+=yL2s()k0qv-(aHZ}!Mu-^D*yZ$^ z8IxcCMb3jv2^U(cz#LpAw7~8K&_2 zkP#aRa2FlXB8q*bZ~@_s81O&VdSUvzCZP$OI0mSaWAxA_yc&k8LeoTQ!azv9+pDnJ zdB=!`uzH2NA}@a>k3Cuv6a#7A@!y*Hx-MuE@7-J@Z`3al^nPW%C1Gg(VPN{_YqjXb?F z`4a39jqZazFz8Os2-n!Av=-l0^|9*-dXqh!aH53(Rd?M&I@U3P9qREnLc~_j=k8w@ z*O~PoGO|uQgyyJtE75>+Vyx`Vw=Eqd-q?02qwBl9Dlc9Q!~s-EfWK;G)(w3k8pz(D zIRKJttf_k*F)lvsG6xO%L-}_iuB7$|Utp(=PAO@CbqK z4{4AU)^-!-w|Ejn_wu^4W={s2Xq1lJ6S*qqQPqGBg19Tr0sATk!%ifbr~o}N9(E7$ z?i9?4PRrd>^UVmml58|+zcM}!X;{){v*B1xDd&-4VGQPTjr`%*KLF&1I4t7IvX@!g ztHKssVg7MhhSG_t2rsdkC*a`%{|7&T%GWyO&zx)4``4APjw;Kt=qEsA{f@)lewf9q zM*;^q8X+I>(U*K!BprG7L3M%UcZs)UoM3dkPSqE^(^c|3i^(m~9&9-5D#r0_pPKC< zys)Iss@{{h$&(SGi0<}5hGi$;8%xMO91tl^W{V$k0&#H)Llt^e>v-vC()l$BuO5D+ zv{Cfiv4AZleYn&~A5m0wGPD07yXJ@fyeM@V|%aUl#=eGYfLXjNRo}a)~ zl~#})0%E15jeyK&w;;~<=&^jq)^c@-uKia(2-G*RnpAvH!@vO1Bu{WbmhM8ep^?c0 zl!ffR>6N>8Tm8GhCtQ>WfIS7jqsL^E6?B!m7$ z@HNRU4BYqhCe2oV;e}?{!9Y3Gd;Fb5H$)$}&ErEPD<-=VS)DrGjpsJ>3nIpT15b%fj5K?|<6@$mXBZtR^e(R4XEPM5Y%nUqdzT+%zK>0lzO^-1TUyZ>98%hhzyiwFpQzJFgp zLaJg~f@01>r`sId$hx5UmF^i%PBy2W7jlIE)BOvA1wG3&6Tl8NC2~6~_EV{La5sN& zK?`K9>DWu4KZ7<86jXfI22aa_f0&o4!a)Q_XIp+z$^S09ZM8`zn2)8!TwbyWF_)D) zC{j>lI0+D@fkr5c%3arQNuh_fCDYIp=1Y`7mU*uFW;)P1rNJFeM>25`A}SCX znX+KuYI;Wg)D$mMhL!EXY<>euv}Yg}aJc16v;@+6iPogUW16fXbdyr!;p54Nh7^y$ z9VHUH@~a!Q&1c1@$i`}cEUi`@e82T3i;H(HqXY~dXRQT=_l-Be{sKMZ^`&EeOrIa3 zC*K_9FfVE+C794{9cIu8b4B1bzi7BDn8GOtT1*07%O;XmYZnggU6@nN-qOX?l?qgPe#UDGO~;gSS36)q-wF?e#7i zwtf5O?>9;W4A03)puqXB1q~8dC~N6}9m$;Vk+S8U2$(=l2|anX`Fy{0!!WcCnq^YX zPl`R4wcMd_|E6u;i5+xt2W5Wm(mGw1g)6&$i| z%zbS%RNh|KFlL2xNgW|eeS^C1>xAt-Og^m^cy*aqqe1K&t(wIoc|c4mk)Y<&1$Ohu zE<^tIl)=8*zEWJhE`OsZ8ur;5R7zkok~vR;l9D&lfscn?tP>EtD)6@;ojWILJ_;g5 ze-v5mf8E$x5{FcxtDNvNn$7c94>5js_GpqLZ`-5+LJfihR;sZk)u2@-E&TlLL+DU+ z=eXoee|+St0<7wD3>t}p)6<5*+p2H}Mt|BHy*DKe)}kUC&)ho~pa%(%y|*oce&rTCTCqWJA;`tKd|L4jmDDYaaJUI4J)3TkmBTwtO^y<0d zT<#0YfWI@|dIG6Fou?Of(0tQ=r)~T*m#eP`dzm@F#yErV$gQxCNV=-{4!bORK@BQy zu-W%Dx?LYgDl5<|=NMku#~vAjNL~{pXW|e7m&trsFb`kD>Ui4{Vd-1m?=HKf@0tWD zDrt%GJ2g$dSD|eTgIcOul%FDz7T7=_DthbB_84iU$*!+oS_HE7F_eZ;D1c=O{EKkJ zkH18%|I3=AzO}FO5p>DGV@D`i+E_;>Y|;kcD)0ly^7<^$Qn#T=3*0=wi6K?VdGq-o z&OKPF29*Q&!O=zIv@;>Yf4Yl8>SL#}Kx54QyuoX)i|Vv+S$fgNFflOYz>mPu5jT$J z@cb;ps9mpV_hnEP$YKNjA<1M+l>BuVVcsd6ZRF0z1Y9X_DLLmdlzBl#ZN7YMW6}%D zlE9Ea5~D)ihfigK+LV3KY&AUiu|A49m6pGqC#_Eg^8O|V(BCXCYkbUzE6CYbn;t>30eZwXOCa-4oBV#V31H1Fx_K6 zQ3PvgNaSH1KF8NlwW0u0o|e$xT4R3P|M49#>1g_}iGDju)StBa^7hwrUfgeWe#&j; zWIjl{tumN!1YLVQOkC!oVo-yM9?VhgqQ*~nlCt&HUHzmqL&#&M@o?H}#s^8!LRfCK zG8c0~)6dU7D~L)reFKo64|`sHvgy_w)<}x{95tbsYmV1M6(RX9&{>}QA7 zb$ebmw{o4Cdu>#Y;2X1FUBB)}hx%0A)~k9CzBr|y5s5ke;g_$`$+H?%c^f?M7v`>1- zrCBsZ@cgw2(28(yiYc~B5x()mEu$4)^6(#(+e^QBqkZkt&!o}&hHt&cS_|Dq!CZOr zJO*I{NP4Gx_U%V!bDq@+Hx}eaidqUwo0#Gh4j}7N>S2{1m)FN{99Y7hotq;33~0M^ z?EH3fb~p?P_rcIM2Xnv{+GHl#o`SwuO?&BjPoc*zH^ zfk&`8<4Zb`3@p)91*v@zhDQidc?;r-QX7BblmFO^&2w;R>td09hHci=O(gEg;p1?; z$Ye-%wuDe2e4{%I&qr7Kc%C0od$9jK{1rEpmEPwb{_Y*S1uhZ3M<)FWltzoA} zlvn2Qkiv;j)xnv=DO|s8NCi0kNTR35+Vb9jIoIxY_Pb5I63#021Uk6(ApJQb(_Do? zS*P8Pts|T!wz?41)cqP%Ft8aDuPuA?sbiK;f`iL>&`Bink&(mh5K_LW>5PuicRjKW zXu>M26)#+}vK#UpaA+x4g>y1+SQFMJHy?~rpp<7EpBGO~Y@H0uk$Bo%l&QWLuM)rR zR}rO=2?IM@0bw(l2jsl^D{$(xTnu!(jnH^qT%6<|%xxCSKd#I;|AtZKLpWKvXA=n^ z8!f?Sqy#;k{rmAiLQ$!agfhOWszveb@?%3({l#W0&#`*(^f-z(hc+EDpXq0!(L{0nZ;>x1bzpgiP9tj?$nr4E!nair( zY)nnF%2O*Xyu(qpgS>@jL$!WYn@jb|CJP04sqMATb;)1MFi=Hk0C#L=Oe=J|z37Yu zw>9rtongxO51i854l~!<)vry?k?`bvIIrO@thLi?!7{$4GkVcL1JB9=F*1}jx|LP> znZPbGISZlY=d0x#OJ3OLACxslV%a^1&iOyHw6yJKOP@S`cVJ0~CS%=-Zlc(lx{g6W ztaH_bWdZ-wt*evbw_3PgVE^Ufbccq$aqlz;n<&HY97SZ*U_Y^+7=}OMx9`Ata~v@i zfU{1T5eei%EsF!&z#+Z7(d)CoK~Qh+qqc&z*9MdsN1b7XhKA?tc?FvItWJ$qd-ozV zjM2bm2dtCM1#H>yGW?5Vy;38AK85ooaRZ69A=-9g!j_z4n&)Xdc32xXB$qHDHJv0B}D1>S~1?WxDy;bY3$aIZWf}!f8SVK ztts%C6le~zCeRBLUHxEpx?!HE@48^5zfz;XR&s&i{H5K`VZl7HdF|7X%i-tv=eQlC zXOyLhV@^!b??KOZNJs({4_X&b-tNzT+fUSZ+jN*4T$YJG`efKZOyA(SrpCEO0Iq2#HL`;LN(jMKJKl#-UzUh6^sfy%~R*VBF1$Tc_D0?dS(!z5(i?GVi@0Ndx% z8b!vq^7x8Y!+?G=&BwbjdKu1n%Y{B8ETK;0lsPZPo-sw&fHrnXNH7;ySU6zkd=y`k z{f;Bam<-s@Te2cHoV4`xucrcqs~8<&F|e~t7e%Tf^`ComxAP4$ zJA7Vo@7}v}W0JjSXEEyGgZ1m21af-t96&S>LUKivJ?{-xsvq_XmgQs@YnXnj@+Tcr zL;C?C>Vd^UgwbB>QTJkMnm_OMSrI;#YFoySo*iK)8EYK4QSF~8gKxTPH}Os_CB6R1 zhuyar0~Gb*U(c7*dT*-3;|X;55HaP($9Nnw4r~Tid*K|@UZ?3_5xso2wW8>lJlHnE z2MpF*F*6qmzShE4y{SU;*HjgsMP%dNyc<`7|E~i|-KQCz_Sk9l0S8CWG6q|4T+7;SkAO>k8P$E?JBdz6k+A62oUf zM9dHKSQ9amHX+~*#B$fu7h^vYAp#b>3fBPJ^}LJt()W=g~_zH zH#8&&bZ8MVguvOg>_)A=T_Uca1#d5tjBmj2Jk4p6zFYAuLe7HMNyBuqJ3km=12kDoFr4PfwOQWmn>>k6u~Bi5-;MY%aD*{?GtqEO*ovDTvfeM$n9Xq&T16_(= zMEx%hwG3dA-3j4WP$jS8u09>O?=H!0tGS(u=$8EgAAs>^r3A^$OrI6RXndjxjOSkJ zn^UV}eb`BK1h~zazb_~b_8f#sz5Um5hW&PXQ7#e5SeG~WxCb`RGMJ>47yhqSIJd_- zy+w520uvtLzls@r<*ehq9{Op&rvN?VIF!eN*N3s&m6Twa6A^Q8`vPc+vvltCrm0Lp z9hdPbfbTDGzeTpmKL+71b5D!g-HbsGAAwEEJh$o!3|#O7kV?-3- za;WgMCvl;98J<1nL?#Y0dR4@%0$HrOB8KNITUAxuh0hrApP6bH`+LinwXU}A_plb! zzfXJ7W# zjw{W|2|;iY2D_rmyUfZmMqSi6URrE$xDwlayYW=(uI98pSycMq@*V29AU!hhF@*b< zPKQfcqEX>EHT@#AvIMX0k@Z=bFsDIJFy^^+?q_)K$R~sAMpkB7Y9NZ%Ebtjqf57Kd z75S>!z}JBeucU}V_4oJ3au|X4CeR(sR}c$cj8|!$j4!)kQq0M{5fgWnMZQ=f%z>G9 zmcUGHCw#!Sw!yL8Qe;7#F6)!fxdlSWK}%9LqZ^A{f($`ZU~URsQhhvg{@#55-ti}yylCCNosaukv%u5xBr zc6rO}wH9d`A65vBzBSfCW1;40194dShTEHH-Idg=A<=}pEO2u%7)b2sD6g9^2$18m zpdCbiz7ZCX&pAs9)B17ksVV`n03s%?humYmywf=sZ8QYq|N4SNEuIU|K1*uEn-)?a z#zpphwl^7nhE4v}7`+qp?PbeXe@Rl7SsnlJTRNmy)k{}mCoW{+xxMkD03SR=w z3XAX?)0?$7faZu+N>HD=KkY7}%c!yWAPXK&9qf`=S9?LwP3jU+aaBI)IHfq7u3)s_ zWesA2*8ma;Y!COXnzm6bPXdpMu3Jfc31C5~{)mPuX?TnstwM@K5i!)u_i4PKPn~1R zdn3oEC<4D86?!#DV)t1Iy-Jv&Uh9ELb$dRwtR4z_OHfU>&>3~l{n8lv4uqMbABoYV zvwNH+iyzoGZmh2n;F)PvzV$a1_l^J@^0130$~gCi%v2l_zJUlx1RM`UB8u_ zr=taz+96rt`|lbbbhuY^CmDc=#rG+?c49QTD?G`Sen)e`OW$5z7&@TAn`O1tO07o%_N*~6mm$>go4%`?sjZ40-a|n5a-DRB6 zLdlYlgv6oaRpv2*X9IgP5t6e!oLj}$mFSq$FI_xoOw_gDZR`?AkWuTpw9+4VqI=O3 zYmiJ=jd!=Dy)>v#sU^dp{n1vt`Vo-qgh4ra1w%iM1E89grHskPRY;=q( z%ig-8JAd`oQPPR5Lz9p?yst3T(NKK{o2|z5&z;@^HD!{po3e+ErM1uZ$X|ZEbd?du z1b{D9G^c_T18zPg4V9J9GK~N6hVfT%H`E%77}z_eaXyYNbYqBqOEnxEl1+ z0l^zzXqVD`79G9Lu1XG>rVDiZDwU3ZQ`*QU$f^tvXfElB+2tyPW*sf032l4JkjD&p z-tiJu^3Ux1y5~Dw$>Q_1Q|JTM2u~4p^D(I z${tx`gDVhes_TvSZ$1D#;WNj>YiOpMedXWwXu1b4-w*x_$J)`33M9}ozO+ga?*vd* z7c6_Xf~8CMO&Es#d(XgxYa?-^XNZNh>WO4z(G}spEb#VDpPnvn|t2f9fWC- zkVY9Kg8Hn6bG#Fl?x~8!<(#+Q42AYD>koQcIJ1!{1SccI+tou3<4}srFxe+8msU6eu7QCj1v!o}$DiPmzziFXc z@M=X|bLc_!0bN6^h!+(pwE23+2FJ~Od{UxcIMPeF3gtVAXjZ!!c1YN?3T3}_bVTTR zb(R1fw8+%Zae(CfoszHaDX4I^o5Ei7&Wv$`t(qM=JLSSBm^*e}TU6cL3jDR<_(}X^}_6|`M9osakE@gH`0Nn zGKILn)sp+2x6JXy<-MPrqwOPIu@fPj=wJ!>@pPQgeXHsb7M4oVqgPZKRQTdvGk2SrK30{A~DlIvUohOpM7J4>gJj80Wc# z0Hb6U;vf0A{idcd^?M#MQDHuu==_jx%-f1?Jkz(=;sa#4%5u;!rl^nzL}FWHa@qj8=+~+k>)5? zNuzgWoMgb@KJqSc^v0IxR9W2TyN`kFX4{b>BK$t$gRPs}&TWg$DEsnNX3$43<+ps5 zDNXbJx#{t{$q^AhQU<%4>a?;x;BE=+<9iQXJnYiOx``mP&>D)Xv&rQLcf1C~x!S+| zQ1a+Tv}^z`QxfvpeS+u2%3|tMVQG^Eiw1caCR&@KV@t4~%l9f%GOk&0i!yMudA|(P z#PhYYd9*1iFS5Mbvhx+of;WPRz?xiJEXy{wNT!JeTGZ=F`QFPm0;;qHschB6ADv9r z7gt<;$j*O%(ypL%kX-CWPkRqmwP$(VVU>@&QyR2?HP&LNjd(ZN=997qRiBmG!4p*< zG8ZxR_`G2OolCG3Pgmzg0OpK!5C~NYc*0n$&%O$jRk*@pK!M4H=>8a`D%)B_ zSI}VN-301`0jC7rJhVzudO5-NPrD>%HzVt54I)^zf2>i+_7vsH za&~_G26#!g)qMSgX$;SnFIQprL^D>Ttv(8es4)8xkanwWzM3LgL)7lcrynVD4D_jLq){I0_vC%0^28dXJHnKZlc$X5?rMVsYh8!Q`ZsDbz@|RsB9# zsN*5W3v(^*@Bs`&5UQ^wV97_wg4g53jWx?utdej{K>*;Xuc6PIjypJ)gWQ6b(VaR~ zOinCITx!`jiRK;p_rsg+P9luWRVz4^DSS*30Uy0B@NG#==>a7AVvTi>j8Y7E(p{?1 zXT|lCaW@$!EotXg5;A)|U3D)4)yFUyFMBz1JgN^Sv(Z%UoB{3E5qWJao;(O3cticC zB8%*Eb1`nU&WQ4fKC6!9>gvN}RNuIPiUnp8GJIeQn6^|=?v@1Y9~Bj_zepa$VG&S< z5g2QkBIU>dwN5_#;2H&0D#_UJ0l4){jK^gwM5|0OC(y%2p!#&pmgF@7unP|if^Z<9 z47ag9GF;_|!hkRoVJq&&Mi7p|6@_k=iKhVA>U9bOPTGAOiBha6VJtT7vjXg-cbf}w znWjrJ0W;;Q^)kXxikHW08DQOy%OYXMV|44laS6s&%eoXB1o!F z9&d_NrobBkq&IH$s6zttD(ha-Siw|^ zJ47I0N=F*_yD)!bM`w|nHRwL860maiN+N|R4;9>)F{IDGrjCnE$ zG@ffSw@kzRojqSaQZA5wckJ25xq-8rjGxvfhLhX!sv@QPEce)2IL_SlrwlwF;yC#B?_Gk82T7B4FKP>(BA9zVBiQX%B4oUEB%V&y(NEPm(Sdw zPg5|Ix<7f5a@Xs$>44lp2dM<8V9J^7oMAF0aZ;wlB;<#^rpM6$i^va``g-pVpB}xI z12W-Hl_}59{DK?BxZ5#iyuQ+cdcDpf6C=!#kv%Xm;D8oOCOewvhgop0!@m7e!)N7v z(_^Rik8%q!c9Ua)^u<(J1bE0L*wn~;*dNt5Y!z|u=XBh(OIsV8gky2`Ip!yCx&t1@ z2A^wwJBW8r-fr^#s?oPG^#QBT{OCD8Buou!JgSnz`BQ;S#U0+c`3oi+FYc*o zW$DTXW8UhYa`~!g0lOHjJ~_ci^Eqcs8tfYhu5M)Z@1st)=A8pWlCvA}L>8`}8RV^! zZ92kEM>&|y6wH$btMEgsCR*k<` zQ01k}sXwWZ3gB2wOhi#W;0pxca5sV7WcTrkg&cf|^Rnq8nG^DPKJzIr-5!S*F#sF_ zr<1BnP{F1p&1mtqaPRl%5QIE1!*=`0Ekbp`!IZ2hpxS=#b&3@{GHQWgKp7D!9H$_)64DMem&A}64B(Qu3aGCJPT>gj2>l{693VZ;((tBx5{sms9Ht;Yc zabt%VcOePa^?8@4JLuHnBW-Kkl05@X#rZ7{3damDB zKsg?`_4(HyKy&C%qy2*=@eAnrsv+wzh1TnCpoHGL{vqON2OmCK+3^Dc>KfCMT31#_ zO*riN;V%!{j`2@6gX;hT!5Q2J zxr1ciz4y6i-?hH={k!MSu$bxYx8APq>guPSuJ#p)Ji!r_T`)< z>&!=%bd3ik$Z&opfL^_&;$=rB%bp?QiFOoLS`-9-do~}u{Vx=XTj_|FB5;v^h-tZP z@AM|Wt{!%xcQa5|`>J6fKtD4Se#>2=i{FN?M$6X9)=mkQCEBbqW`ky1C|2S05TsxN&az}Ndo@6z4 zm<_Y>s}SB5S=A>32}dH5;)Iw74%5e#Ap3!tX!Kh|#5JID@(~3yf<-?hk28iW9s$jme4OY* z2AphP1xc{MIXAmKCLzhK9K=Fjk(gp%S7&I$;8lfRPTNXPkcmR!(Nz~c+Mub%+wfxr zx0IG|{Nn2MmD36sG^}=J#?v>zwpi;tl#J>HWZ8}e{2qq|AsCie z#yG}|Kfa^kIS2ZX9ys_z^|x8j^I&J<58r9i_`hBJ6#EqcZ4$wB{FS~YkhDb1HKnLR zl_`H|=3znvq!ty*8e3m|V*dEZw~EN-wXF@Zezq~&j4$8i{p1J$N`JN1DQbxrvs%DL zz^>4Ix_2ka9>QhTE>k;RABrJ=$osK+f<=I38^R?l)*Wc@@9@WM2e?Ft-h&-O136jU zQCyqA+DM~J1#bQYlu0q?4!!yZ%`G` zVrE;H-lr&wNy)F?`;<^t&@GfF>k4p}AL-pKH}V9dt003<$+93xBl)R#zMy)C4sJSN z=Nv=9PJLgZw-7lDeskx9_s9n~L4lR&i0 zX-6Z2O%J-eeuAS|gEPST=ge0R*z-TqVJUEZ#%$yzcUNIs7YDMM!+;u$!vZH}PL@F~ zJUDZ%zC=8_5PR--|CDf}axKP&sJsqMJD5r0q@u{wL3|-6DfD2uyUqPEFTOsW#Rp!%j9%6e?tjfg$j1v8sarT;_iq z^M4rs)##H3rfw5c7E%1SLJAijPKepi#F;vkK1gXy|DI@&wSmW+SRO4`snw@Dv1slc_LN-?l0v(cq2c0cT z!pCznhO=SVw_V*byi|zH%^#+fs`Gbtil*e`+s90Yq}CzvL=Xy`W|bc^S~k+#^hL0<#12B@76G0&)P?7uq9SaI^joSwF~F<52!yu-5O@s=FKj;xx?tt~BAD);^jx|m% z6h^tG4*Tao%)N~6pZLC$aOcIKGI`GfKhZh%JlMcsu`%X1}OA-r*+JPlsiUv5tmt zc$vu^BGv{NIx(2`tl0eYh7B~-qc;WAqeiV)BNoNXcxj^50Vo+1Y$tv3e77kVO*nui zCF=AiEr%)#uJc<#0^Xfav18YkP%IEN;B_E?f1UD?A`2YO301KrX$37%8Q&QGbhp(n zPib=AT;DOuk`Q2tPlPEE>co8APgRnC zHY71RjT`62n!c9VI6(U3d9W=ZX8^dYQ(R@*sEa%m2^)J;S>2ZHq;@0A z4A*@(N3GV6JP&Ug4yT9wPBhXl#u&xV6{ja>U&ScAQ6@Qbl z;HbS*8RA3BI__uTM?>|9^jUzXMnFqf0w&Mja&te#y(b4{=J+pjeUv~hYc~t;K`W*g zuw-v9u|E+R+>198q$N0)W^`=alV_Lp`)>-Q#aPGPK^E&=L;nF{L#bn%GSNMp$$|i3 zk2L&4+O|LSS%UTmFkXl79#6_MT^wdtE>?DueVdc?)I|L1e`i?$eQ^As=>Cs7NW4ko}+TlCvdXzU#hETWp z%}dysZcV+826dI^o!4V>=5tz-H&|g)Mp)xAcK#d-MtW*zTIZK z8F>p^`cg`L-biX@gK66*$f1BoMDcZ6m~zTr1k>~rWH8%=IVmHF zUE(A)22+pXG-t>8tfg)KLQK-Dy=p!C9a!Vw;j^y(Y2`Q0Jl@Gla&Hk%FpJoPD&c2S zeeS!Sb{DC(zDX<_Yj3$SU^$slP(<#Sc`e|zOq||u!*X-^%?p;??)qG3wKuY~o}Npd z;ePWZkLbddyC!~hV(kbyKe}4lNEbeyl8#CB68o-`zWPnu@Y9zqL+_b5>XILaIMT`nbPhdz9fS4q1y9ZED;ZRR>?v*k;S8Gxzn}nWumWrdCki0BT-%+F z8) ziY4E?F>2(mH^lwUf4iPFZRF>1f?Y4GVZJKyuEdKE>?Kv=qsrfVrA4;NF_c<%Zm@|1 zznu9)C)53oM*THaLJASenwHD1&sVBus+KOm-vn4~sNa<(zebZdCE(XHIizm6!ax3I z*g#*C`|{?JXRz#%{8*s)G3`yXG`v$qj|-|1c#6|msxRfl;bwgmzk=O&R9<}FgCBG` zls@Hk^Q4@kalNN$D6}LbPH@LYd=+_%*N@PoygzGKcx$E3`Lv_r50t+Y}4~k#3Er?&y)UVH1hWF?|1avRM@fY+b&k+LyD0IIc|wL=TAwYnkz2y_-P14C3oH zKc~axLihMqo157+2H#-~91t~@uCnM3@on#K36qNx&8|mv_%B^*sH95vh2{G#M_y2; zs9bI}NP}H_c=v3+?AW!r&@#uEoRM$@ygn9o6rEf?8Jf>y9digcg= zPUeB8Yr>X9r5;8x9e;EGy<(NA>3617@<22i&4>~+!s|$mR;I8m7$=_ivE$dE-ODxV zY5&P1p^0t>=$u_l#_I8ryt25aoG|Xa%^?b1mO?;~kSx38fgXn>Z8?Z)cI#4FwbY6y zj?Mh(IJw)?W4h@m4DV}Io;=-dm@WN|;?j`w97e;8Osf;~H}JdP=If9cW1_Ru#Q#aZ zC^FRcyJh%(<)iBs<}d!~bbee%w-|5luSOO0ua{T;UBPMyN!G44To<=i+TJH z>*~b#PnF)xq|as)r;K0Mo5Oo=&CTeftZvCwl6bFxynJgE6keDPjhjMibr*Ga7zx?d zcPKb*XsWPvzunB0B>kSdP&y>lvTJJ?m)`pRFdhRt1rnmnWq*99j{}Q)6z4sMC0$es z@IQx)*FVjzSko)Lx{jNGj3pn*TwDA9J4si3^OFBY?=Ug)20E#nYSd30y0K&YcbgWb zNI1@?rz$fwrAHD%#6A8y?PiW&`NuH3ALJErUP8AOAL$0xrg zp4G<1UEVojM@C^8P)9T^ib(o{^ees8-v)WiQ9D%SqQZdRqC0QvAt4X@t|zab>$y_* zeActI8-iK9@8Fv(mQzUe+V84$&7_y5ZR2UL%Za4@#;)7;IxxMgr!jBgr>~>JH-jyD zC6OTLz@a9!mxB@cSe&-i-Hzx@_oOo`Hfn7gpUcwrq=yZM!TGmQ&DUVZ_BCa{yuoFT z){V@=)LD60&ge0hOeTqn-4T3(?$58Qr|zLGP73%4yAl%{HRXXfq2jNs>;>7PUQHBW zjO(0QlFHU(+}7}3dVtZ;65#2IB1ZDiZhnKtZcv+6=nwokaW}71d>ll#+phC_H>G2~ zyZsXRRi@)??7NMm9|=1cMW2#;rrE?(yO!?nC-Vn+HrRb^wHmVwSke|cV z10X7on4d`?8aFr(Ri>OZqi!g>W*SpY@jhaKjP-ri_WOn6dvx8O)_1p;M$x9^Es-p! zs$172du0{vz81oJWHIyM0oph|>2E`n$++ZDl5N_iH{vXw*ziBw>a&KLxm#lmr|asi z7n;SqN(gc*qp1$SGWWRNC@Y{Ne&hPn?9uGgcmto+{9V5+Z>aM-&Mf<@yUiJ1ilYY! zdM5Haic$(T_CEiEIP$#c`m+*kf6vp!#ZK)OH|zIc?KBGaQbmYGe~4B5=QGUJrO5Kx zhbpDN#C>13D7|q%)EIx?F(LJ`JJ}w#T~CB1XNLVz^LvbK%1Fl&5UZM>U7(iPt&5ih zOO}y|95rNp6Bos=jNJU~eK^y1GKioe)(BXfv8qFJnig5w^WOHq`MnKYb)uMN z)MS<)#)PyS!QePag@?bKL(x=V>;BmQ4%0N1@DWyO9+$JB?Rrd2ff^f8WbX6UV>+h` z3>tt&_&6*F=Rt|XoZF;b-Mrf*Kb7BQ;@BT$^#!G0P9HhE?!7e$6Q1S%P8=WUIM@;P zi;2#)et(JTbZ)cl?-iZ_xo4N>hp{iqiQLNEEu1t%IX$n?g-`1t`jB7PNIu&MMa8Rlnt2GiFs zd!B=nkhe=@^!O@>2Vtj~;9*YaSNe>Ee+Ssn-lhqA4DS9@@@!?7oZyPDf3e3~c<;Yr zk7U#?VaAyV59{9Dmh29NI*NdOd%ugm-IN&d_$4Ol^wBYPiP*f(F7*)sW$fI`5X z5q2e{L@bA$Xt}T5Zej&2cmyv%6#QI=_0aR*&VnH?|Mxrp(Cy@8~9v`KW0(4REKXz@0*k?Cxo@88howXgi*7j%<30!z7-#?V6~P@`^P7H$aQ!|~u>%h5&mYLM4wr;G ziITv17Iim_tiQ&Q7WdubX!yu_>>c;Ig0q03&u?q=!d246T4GJn6Zh}2_<%>r`{o~jwJuFpt5fcpg+??7;si~Tp`Ih(^IP5eb zN$>o>V^e5mVz6|l_RHbju#K!q^&%{@_`c`4;J~NHJwXblwoT}kkr7`aP=&BDbW}c$ z5aGMtSH;D_D32!@<}8ns``!bC(|USZ!i{%IEsF_m^UMQ>tfIei#&NC~JGF28M_P8F zr$n9fqStN&^1;2U8^~Gy)=2|+m$3Km4)b3RU=Yj{YUI0?!J*C+$(aT|I1a$6p8gPm z@+oxon)^AdC25*Y9))~xZ*1T!kp2kg1jxbx5bA5Rjc>yRb`V|eFA+qI*)0eUh7RgU zqe19-@RJdFy>-N-7+5Vg8C3Es^kK@9(F+24SWU;Viw5rP*>9&P44rg3!2J^>Ohsr= za7o}o4rRj}y_Y{SN`Wa}qThjO?ppP6xW(?F^SW zU!Q^LBJGJ(?BuU8+MQX&I+BU3y~K9@f|}_>DxHTvK>a3e)Aun-yEQhk<#MaQ=q(2B z7_v_Y?nF8wsYvqIL}K)uKFIDI+|&O=*S-$VgKK(*VOhS4=k30XhK-7RZ!@xJ3Ij61 zi^BEH)lb&zbF!H72OZ^t_o?+bb*a3!xk`}XrV;+ngllqymx$O_cf*9P6e|_&M(>}~ z5&t_di5dG^L_r(|UAiv($Mzm96_UidmrMvU3pR+jJuO$MQ0L^}RVRy7F zIN2pwP@~o~NDZl$h%#f_4N*PQ*dyGfvK;rF4~qw%RWxm@pDcdBkz)V1 zdsrT#hAAd0Jz1S2z7Q9l+c3*k4~g62@3Vs0hoB)@R>E<1^pth$xNg`k7t<3QA|}JQ znCv1RUc1`>0v_xG{1Cgcn(i_buo+uMS&vv!iw-ck9Sl)pPKvT>%Bf2LOL6+vfsz`8 zyOIBUrs9d8aG__0_zcTK;J!Krm0L6LKO{^2hrApob9bTbTQl;U{qy|@&ETTy66lE; znR=P1jQ=lrQt!}@w_S1g#X=`1_W`I+LhOZ1zY2L$@OCVa5)c9^s0RO)#(l>!kd;zc zeu0QC0H_}RY`opQ>#x2a&`z|D1`x&S8>U;i2cK^h!es;M-%0-e!i>Fg*-om>S4wTSjN&r;Vsb&dQR^Bud4OfP)pKlX$+_~!xET^KxMK#p>qkP@7Y0PW zBcx_DEh*a{;gfo}WJ=ru8Nu>FlmoGxEcn6r2f&X+h)b`&=0@`9N*UI)XK z?6qrt+Ix}OTu$fnq(2FdhRE+opAC#NhLxS8L5R@Rfyxmb+-CFrna`8KB)kkFF@dZo zBkh~Ll_aOHuFs?b49?Eoa*3K5i`7f|mO4#5dT}+cR3h<$dz?ms z#88aXV9JhVDh@_4Y`z0;~ z7Xu8ViU{kV_EWF_@;KOG?W7Vqb`@W1v67Dqq%8-J=$!G(T*Wi_IPT_uz33uJd7i6H zi;M5+@bZk~dDc012u_f;XvXgO`$7ON@QLHb&*3w9cFUb?gQX8U>1vzVG2Keq7gY8L z#6FE~0c{LDs+D`~KQw#CDZRs*7(R=V1@^W=avE=n%2_axkQ1gS<3gp*j2!4rmYzdG}iez1{PVWT3BxDK)rfKHdM=RZ&1uDm51>*$e z`@BlM-q}}HI*>#=!}q*QXR2{mEKYZk7rv^%6(jM5D!Ofr3QPV(rCn4s9@Y2Dq4IL+ zko+39C&}smI#p!&O7uucn~&NVA$~q{;>J_|F%o+RswqUYq_vfYSw@!u$r$O6c>Y+t zpU``9z7Ev(T}T0bTPv-glI}Ak1N4-`OgFbv7F+Ffp2uPW;13srvAUDZIjOh`H0Ar7 zsN)|&X$m@{dR>ZVG@gO2JS*CEZOIc9?_|5tw+*%xfe9PByC?f=ttDfOom)e8_#8_g zs>JdjSnr$Jrh-=)(q8eDumnFQ`$gA^^mUUtnF|qcE`I$@UEV0R5szo7-6NF5lkCM$ zQZ?%a-BN>Xwk$40%+rd=kV(AiGTL(6KIJpxNZ3OeCWWPyL@rSFNnfT_CNKwnO#E=M zS$c-%&<}^W;F#MQy@;rxtG|yghGqc4+UU*Rf$`8V25qlwxAc1A_Ij{5hebh_M)I+( zwDL=ow;@rof*`RDa-IxT3$Ido1yF46UWu?YyK;=kdT&*pI|P4; zC!@k(o=6O1Fu@7)6i~Ohg|_h3ky5hb56s|}+`M+YNlivD=zXGkUn%dKb;KSz;h(kw zeHqGPF7RXT88;;KS$@XaAxO*F(}K}%@3lIWcL1WN!9)wB&R3-9EM7!z`%Jvl;d`Q% zMTZrzYE@icQac5ym7Ahz-a>oM{_F_scg!?vbQc2fhPxN=(96r^lIw9 zqpStNNJ52Ecqx6jvWXpAQOhL&=UHDWt*_@w%)~PyJ)@Nw%1$2h&sB-%8o8}d%%5AE zl}E@PPe7!VX?zq3i>1;68Xo=`+m-I`T#^}X-%TwK%h1&I(5)MLj)$@JPFB!`Ty(lF&@Vrs> zoUaCBywhrl!!mG9T6XFK!m$zBalVnh3In^20iQ5`^|^M=e{B`C!S@ef}+v(3Fsos-vHA zz;|bcAF@j66K*2Z!|I4rx7PKkH9-q+T+8129}MDR6{SGu_N=c&f!7FUB- z$2Y2jcxe}pe61j+b6Ej3&J8?B6WP_~N*TjjXeaX}%BAn-G{44ouL~;|`PYhc@;aoK z>`?4t?av0<4unc%PU4AdlzqEsQ)%zp>F`(22PL}S$nCzdSRnn}WHB9!AIP(imoD#U zCy5c&a*pQ)DRy2+T#_>bHz^m+rY%hnO+0JlkpXo-(}+R`$V}oLiHF!b`@Maemuu&u zFVX;rVfA@gazlHOa55TGqyZsuVZsoII@cfnStpSmvWOf0d`ZgK5Y&B>d%cC$Goi;1 zWerwiuXDH*j3qssAph1FvONCYlqG+REir6a#0t;0QyBvN9OV8kL z9gVtEwu|LvjmAq`zsheaGEk-pTI}FpIWBCsv8xD(GV;*70+0Uo8j6?0P1%Q^O{KN+ z{^GWcu2V*@qdC%`#7Trv#(_ zPi<^$D}yKlF}ESSI%;3k>Z4T8%v}?PF#JJn%a2<0Pp=X{q9s}$y4q_B+4J$MgRv*p z=ejVGv>Q^{fG1V^XT3*3cGeAC!s4I{!t#~(ega2^Jv(JX*U?@_GXPeEwdY<{O**?+ z8{KOAL*$0O4kvC%rqFCgmQW6)aE7fCD3UZpYazMRNZaVum8Oewwga{P(W@3`*sS;( zF&H|D=fg1mnMUC8i&i7Xf*PaQIv$GTY7!NmBYWAFwWiH3@)Fedr_WJ5G9XwDD!W*u zE9x~*VX|@jjPNqk6!&NH8Jn}t@#U;c@?lruJFsMb~r6CgnjMox{9NXokgD*N8-{J~GH72l{lfK($xdMqAKlQ(RF; zj`D`5<&9wE4ZC-2DhNw%NH;>BleoX&io(h^1%C+VEdAy-Q}vM1Ut8oD%>?4PsS*|JHQ_D9!0XUt*S7&#T zpK7F?uZ_|8md015vXveu!vVqq+32``x4zqFP7`m~@y+*q# z@|bjN+A(yE7rHJ^wK3)%UAIp-R@}wCa6I?W#1up9bwn1j6oNuR=I6peCcsY`dZ2q$MmDaqPGD@7HH$q;O?Yjn|c zlpb<*UE?~kU-cT9-^`qJ#XdNB{+)743#L4uNBdfDX8p_rV}U~g2J&$02aIhwI43*- zj|!6QuhTE^rkLQx^mnl`*%^HNdD4d=yg!%fH#Wi5(p}jAY8wSoA*dSF|VYU z7Db#Eqmgr$a*Id3F{2l+cSmD7!&#eokXAj=>8EHnUAE+p-H_T0Qqo6;y}vEExlXri zqP`?S#(`j+e}xr8w7VA5pTxV=xyHVzeJ66XKy2l4k|I7@$0}$uo2Hv*yv#$Wj_cU~hBnu9A$P%^=uTnj(%yBQi+<@v1mBl6P&cG$ zZ?v!lao#WU(MFLa>0^^y*(tuw2a*jO!HO;IlX&mCL^Hz{Ad6y0_5(}tF@;b=`!`z^ z2xkK|y=m`8pCgbPW~a-;wM5gDbXu;r)QhA1b`PoC?LD6EO;7#syT(;dEHJ2y9<3wu zbV%EDeIDaR+b?6cWvo$4>ve(ztbHVM7TzO!wmI;T!wzH7^DOHD87&edz6FhSH>CE% z*1|HcvJj>+FA3k9#~;4+j|BqB#k3a74+Z&|+_qe{0%X_9uN=N!2y_Z-SqIgjSOrT% zaI_uQ=OeDi#b2K`Pr;spB^a3RTTukF1s$*>(1qd)wr|*2LKA>pZeHtG#G5F_D{Z1V zyR8NGx{6}gNbDopEp2rjFnfrga_OBb7I!h{u$5PtRg08%7Vot_e;pV2Vomg*5xiMS ztSN+y-l~JJGz`7^)&QGi(DQW7&Fq8k3Ig>>_G;;w9{Pb~W&S$z=TftJx?-=921`eW zn`)e^(ls`vE%dbeBHq%fHo;0QkwN`)>_ImAEPY|Y5&}nlqxhlp)09T4n;CsBW?_iE zoaVYinI1Mz`J4Cgj$79BVO1OP>sOTBisEix>&>27jh`=Nn{Bx{D~k{ktgrnGHio?! ze_rE|mx^Y#Hs$*l_D{l0j?M8f^jo*+a5i^W+JR=xbyEw+`vRqv#|5Z?=?$;^L57jV z3+=c`ZN~){gH`RxKT!x7aIsrJwfP>Ny)wSGds~XNp?D-#&*ew*&9hyW z4^}2a_IXRO%Dg~mm^E4@@*rn*-!0v@FC4fR5Ui8w~iiVB>vFo{x#t(-y3pkLUZ z&h;?^1q$z^qByH-Op7~GP%j(6PD2Ht@GB1L{`-;V8sj5gB#?TwYmRk8frAJMEo}j1A zn&HSo5^=fwG~4DpD!*`kVPvpQ8eS~AJch(?t?w&%puB1{-tir7;v-k^W(k5 zm=YFQNV8mRkAq0g_b`t!&20&|!=j`y#0Dw^5>n1`??O~ulIjMy(8ry29LK;_ax~+2 zK;UjJm4xx|9|V#YSamI9i1B3;Ej<I zHxe&>+M*5k1drGh?*$hqnT%=7#5H%Kj$_bzAkV^WWC;`gD@l0^914Q8J^O9zNS3f8vaCOi^~9eY4#m*eb9QOEON)FnTBGGW52ngCA_NYJnv8pdF)<1hZcTG;1& z{Lta$HmC8a7qfY2kQXfe!gylhO>}iW)6B=w<>H0%pI+l%JO-kxt`pbz5-(LuNA67p zzqx>%DRW$wi4HvlUR2{Pk&abv2{-KBC{j|hgJsVSLLmF824|l@)ZDQDeuqGgVgDIV z0f?9Sh!*xz4bR^47zT6bdhS4Hy``8ksXE;N9Po}R?^jLJAx|lmJ}e3KXkhxUW+^O4 zXMj%CgQ=f^KZf}Kxj^S?Ww{K1tfGl4_26StH_LzAr(8nyA|@$#4r|&nf%2FHGCnuOmBP@y^r1+kEjEvJ2zAowYK*840#Gq|a6 zi?GA-AKM~HU-wq*z=6JR9zU(zRO>YLsf6UhQ*D|y&!7I@R1*fXwZ@}*hDWLFw={c= z^?N2mo+41`fk%BfDyV9>7dE{U18ht#S1vm``W13s!Zsy|Ah)h(-Qw)LWvG1Gak?=A zkL%hDyeqNuX01+F)fc|cpQ^)8wx3n-628KG>lUw{{!PfZ%&ifUJ3)^<)9y*X^~rsr z=&ee!>#GRJDne=Ce&bYMRiw>}kVz)W*DR6Hj-Q6-SfIX=u&RBziR7==X7e72M1|lM z;g^Rk?||R<{nFJ(Hu`95@;;RnRT%D}r&sxKG*CHp>Gpc^Y}suZOXaPyy*O^M1#0s9 zY09o~x9c!9U$?cmu{*H2=RwRSI|pDg7%Q^r;pnPx5V7xLcn%HtLc{(vVWZa1_rCG&ChrJi0uy^q83r82Jj{4SdUmi-t)zB7 zd>YSjdCw2h9iyLMpFdNcnEe_N-PWWpU4=e)!KydhCVzN{dX)KO`>E;NWV;7d01trw zvpN4JIgyEpCbKU#d3sND;hD;`qo#=9#b7p-Q~$=T6kXC9ESrwPv&(+9*AOfQ&7*jy zi~6!ih!R{rxxUdQD`NFFQB9 zSrtQLPDcEoX*gvrSf%j-h|(HPjaiMz{CSU4iHfHOXnKtPR?Ul!TLfZg=D5*&J0NOb z+et)tD{yOc>mhuKi4VEo*XPA*ppSzd#TOj4R)j{Px$$K2FA;p!G-CLUg4cRs+LxcX zsiRa^K>Q+U`X@ms5Ag-*soQZX-Xa!wR*^pDq85iaT0Ew(b1B`H-Yd2=bI3Bgy{f`A z73?>L+2tX39FG1Zb(}=k2Gb+VF8*s$bRF|am{{yO^Y*CN?$)+2BhaeH@cMOlptC2lZSXlP78>N0%;JtBT}PY(DR|=!T&#IO?riXv|%P zUpuS>><)78V(z69FA{){t)yA3#?(L}Ppy^hlXndW=Z1M&8c{FKuK5(oo{FHe^E4xY z5n(fC^r-f%@1}pynNJ6xygW;u3~t2*y+=>qKfFMx|bpM~Gj*F&~9)hhSHX87Y-HzoZRzYeYB$q?B zUrWMmUSDl4eSG$Vt{VuRL_uM4pm_peC63fUKK6`u-+DblkC`UVct!s-R1-w|Yzm#Q0NwZ4Z&H@qnror~T{?)8>aD)DBDU6G8rng2R_0@d1k5}cze5ntPwJo34#L)-Jw5=t>o z@Q{$lYc=pvYrJYSO?`o8EL8l`U@fSc%$bvF-Mf5Al|{djhs@X7OWr1)(Qsw3?sHG4 zFGn`TlHjXi8gYQVwlhZryIIE@|Ja?YSt(zfnedH4%ODz&d^L<^Z_cwvU&UFrshrOY zUkK{0J%7>e%K`GNhe($5HkyU(0vOP;T#}A$j5{=UNz(gHX#~QWDfR^iHncl1ep|oU zs*a zW$WYMMUMn0l#Z_PP=Bx6Kljb&koQzR>|DE6Tm=kmL8mW7(hdM}>7(n1r=Rr5Axpx( zf=7Z;V*URvxCX;{A(QGOF2Q$bs`L=L^_xh`u)dED?h)tTU3wmn%!&uyiF? zcBtQ@_+E&qWx#6@YrL#di#iTApt-kxR_+VBXxv_0cymN(G;9NE%>#UmSpk z7^z#hh3EOM@nxmn4AHp7Po=O9&nC#lw9-62D?O)SDW>^V5DcYG5x7XR^r2`xwnOa))Lo>pi>QEj0OG@brkPaX5b;Rh^Mya&(l?SL z7mvP2K$v@GLGHj`us12}a5)y8KU+QVI2({$!i(AG10h~9^qugxUiBP`w1%mj_^xdp z)FR6D5Psu^&aRNJ(b+!PNLs41J z!Pb1zGMw$IPa2pXiB(w{zb^7p!f zf$-$-BaE=OFc;Pl$-9P^GP2D%>cbMuvhCZ$H2>D85u&x!08KM#7V9eh0PPt0x$+6P z990LUYQ4_mp_JO!HpYr&F zpa2i=Ub~GRi}MBj4{1Z1d!pTE8JQ3T^2-l3UyLhyCdW_xz`|$0#lG75l=!Ofh?OvS zz4_ELIeEHCYUBdaB{8x96`T9)_*xkIwD^|A?Mp9uZ}d#gpFSfMCuohg^||sbi{6)B z_Aczf`2f-6^0V~e7&%+oYa#2w`2@)&6=!YLZa>RBXn6vzfuv7=HDZ9uPI@T@ zPv5YmjOF-~Zt$y0|M6z-!TAE2i=q|+VRBw?t{qWl{ay>}^H}J?$%HKQ7?mufvwMPO z-X5k_?MrVo1z_*c{WGfTwjR6hfArjDCumPZ>-O0|ltWevC1ag8Vj|#AnQQYO z6Zb)?o63EM-Hlv~enmZ3R39_RF=-vTH*7tvTzw{Q1X4$RRQL^UHp>BR9yHz52ju?U zkr&W|wDc&9(=>{!vF_89-n0DX)d{rJK7j$AthnA&Ad~hnG_RfYkj%gPq8CN@(7@U5 zb>yB1mB4uKX-W@jyGrn26JT_mC&@90)QB2%nAY3o|K`e=UbV0P&jo-X1=utB^K`C9 zeo#1NG}z*~m(YHj9M9H>l`131g{-YKahi&MY6L<-Jwh z*$)}^t({R0=Z0s31N;PY?pL^O43M>#l`N1-15;jmVJNl4?AE&Q*x3yQB?uFi!r>Gr zqxYn|%k7Brlmv1TIbx;R2KG)Mv;F?(r-Sv?d)*_!SE!ZHFt3ZjA7@?{*t}wBbZe2+ z`A+;T+1WCT{vA~&zW9RF=*Af~sL`f;z+tCc5pPflx&26f zsTJ*a!q;>6;K&R9dhXY7-FvzxPGNWHZ*fYkA|)ZM9{uC2i#YiyC$r~>^Zby7Ub7tg zvjlQ=4{3F5JD+|28Jcf0jf4myXm=o4=gD=AIxgV()M`Ix&f^!-(zMNS8B;v)<&WCUW+5I{dILPdVwA#8Y zj44!?LK^y-$+>PYoJp0yzxT-^l`g-~xqg)L?qVEIXY9e@K+97+5SA%1RX7k=0JXX0faa{Rxo`;^1!-%I%f zZa6FHZ|iMLJ`;WqD!a2ykpg%zxXD?6BAgyDzW1zf?l=S6QAC z;Iw?E<=|0>EHHcm#R)4%6~uG)586$TpY_ep8rwr|ETwWX7B&Z-rFVQe~k10>&AKT zRn)Rm(20l;uZ3I~59+PEp<0Ao&6u@!1ApB?dGcS0>vw$gTaG#x7yomCe=yfMANcGd zA~YQLRCQTTS9uNBx2|S19Hwt!zlw?dqcGQhDl93fsw5;SNn`%j*2V0fYw`Y5EhRI1 z3s*}TENFh6G(0pMlD4j9&h~c?yr;UW%S-*=Iqa!%99%23d{(k6G*8YaqOQ7@ess0p^DwlS%XgT-Ba-H5u8} zd4LT%q=ffyA?d1D7c{+AH#uzq@ttghQ@spA>TuP84|txbL$JHmZtxhtuj>?n1Q#A# z2OhX&ZjK~>CWA0`*&>>lSfJ2Tb+XGI@UCf=wM~`5W#Y#F6CJOG*KL+Ae zgs&P^-K5+%NWYH0F>{AUViMfyDbwiaS&9-ICqbf7{~AN4?MH4|0@B@&2MBOzU%LdG zov|a&1ufIyJo7A8B9J~Rn&jb4CRnYm|1iJUuhKX-;bQsP`D!WLQ)%d^8Y`NUIH1fS zP8R;Du3e(#r=1dtogLkYKbGfh(;zB8jVsX~UQo?_hV7v=NkEz|$aecXM+*_$$rHU~ z%Bt3HnsC*2C=#nn-&G(qa0ke6)e9+yRH3$ppSzZ^7%?Naw{2bE({0#U*7>37K@Pv# zLIqC~`*xvP!g<8vf%J-mJZ7@1dHH!SW%~QEu;Yi`U7h0IfPsQm13x=iQ__LN`~R!7 z>x^n@YueJA0!nX}(2D{g353vl6+#iDg^7pngO`0@8sR2QnpoA(- zKzbKYK%~AwulH5&yVm#PJL~*Np7msA_MSO&X3si1Cv4HwXfcT@G{U(qOg_XsPTZb4 zG-z-th*esMg6(FS8>6!i@)R}~F5(c!6ZN{x?%-$N;Y7`tLcFG=0eQ14tEUwSPT z6mYb#$4z4$=`nRth1X<$=X9*Sw?Nhfw(QB!D^0}b;wUgrs39TZK;jFwXE=kwS4k!S zr$U3Z=}GRBh-(wOD3d-{Q5-%+ts*&0ZyT{fF4qVThdp_tw}GxI&+ogbg-5( z^_?a?AkjpK3+1g1dZsLu!Z_v}x`(Ro={YPZudAsT$}p0&szt=nCn$gG5zJ>5ycRWA)6B^l{+X&dNs`{IX2Z*ry1>B)b`Q)hvdl|6(t|_3-O5D|mOnKYdZy${C4Kf$ zj981AZLF?kM?G_!+eE|Oz{!8@ilna!Hp=X;RMj`od4DTKK z>n+x3k@9=PD)l;?f(D6Capgq_DhT3u65q($Yk9L;DBHkG_%i@V z0lEYw!z5urMz}7ghU$m`Ot(h)y7G0kT;ntO9s}lu4{Fma;W%}yE>=P-5uEZRW#&zB zVo|(X@=8je06b|Z#V-XZo*{(f3DP}!dQeSl)TG>G5|UkJmZ>>a*{l$zqmi1GjJ}Ib zMMt1-PfN8L3DhTqBvhS)7bq{4^$Fziu&egxUhk`})UT8sbr=P5X+d-L`xBR1x40K^ zTT~7L4pwu*bDVSRa}}M&SA+TFOvLC)I;<} zGHbh?5XSx>}>3&>?x2oDYjIP)HkVfsSA+yN@q)&7o?VRmJ2UzfXNR(y}jfp zP!R>oEG}8g6YEvX881>Ow#%S0~42QYw?x%({A(5f{-Gqy7sKH4;B{BG~Yjgy_WB|k$wHqxAQa#QRY!; z%;U_+3u_ll#8xhFTZmYYnY&jmm+ieCZAo=Q*kRl6xX##BbUz)Re^Fc@HtE~AhLuZ2 zl#Mgj8Pwr-Xm@z`1@|Q>6vF+(akO8DF}}ieq1eH*D~nUr-J8)f6heVQ<^4K&lW7*+ zBHd@YvuRGVY_JT7v3mDzPRv0;~ z-ak*LM;EUZ)t;1>xWJC5vALU4dO7}bX;s2k^t$VMo}#Q`gkp>$LxXQa5msUsbx3^Z zyEp!2Xk%e-Xdg~0L;8$rkvfM|lhlktkp@I{j?9PjbrVrj-7Rj3J|w^O05h@bftqia zJeAaG1k>I%R`CmvXM(HOt4;lH`umZJp*7UiTI#f8JNi1bqGFi!nEf>0Xtin#YUyhP zYNTlBrI=r^sF>A^wu}ZmRbb}kU<%~-^)C3@H?9{xvpP*Z^GO_ReZOLL)ceI;K^MLx z4%uYkCHfZ!H`gTnaRfti!Q91?Eo6<~9O+PEUHE>&LfiR;m`%j4!$CZ0O6c>@UiFj~ zM1-AAi4K#_3^82g#Ydm#$j>3O1xay9*&^VA-hy{7p_RLp{i*_}$g=YYu!0ay;x!~2 zs8-wQGWf}qAxpV8{-W?T3&*+#_l_=ZYe(}x61Z(aQ{no2!b!^^qtoc^+4-&6)LD-% zt~X!a?BA4fl=Aa&YVbX}bI>I9$&I!+?0t##SdD=nqN#RjyJ02P9pJ7#veBj47307$ z=@B_GVD!YOtijf~Vtu|%wMNxw3MbZWMSG?73e%OfamR{c*Rd2{o8E17*L37GT;M~( z%A^JM^0)D~JE3d1+@0KgzmL9@ejmn{YQLC2L|fR5^pt!=6wkf=ob|b??(%Ab8OqAb zmd+*CKDH8+{~V`V>Z&!Ex16&a!XBGCk6R9`T1y*k9~)|%bPwt$iXmyGQ2dLLvp%Tf ztD#%5ihC>S!_>3a_?4{X+D4}!WRUur_GUa@D@-f5y*burP;Oudnh#BqZjMQ4E1&Ud z_dNW#tro~ov}6d*uGuA(;el;Vx9@VVr=OM|xgLovToO9W*m>Dw8EKjM{=@r-mpRfd zffoZOmxBA#xVpK-lpY=5*oxkHm|D^4q=%31W_tVxmf?5vvuaz|W>f@Axa?(&Vr}JY zUs_37$wXI3N~z6ejd#HqZvVhErO2rShLFRiVR37B4rAW`RhNDrxq8!O>BYJ`Zs}^p z;r*H7dktjRg+0W+#KcZ5cJAJB8V5-f`Q9h-hQMQ+4+=}ziNjus0y-O|w*fMT&WCoZ zGg(5^aMAQarQ*ZE-66*U+}7%oAzOUKU8mSP)oK#Q$=^!hugUNQ`16{Tn#)?nTKMR1 z(c=N-Tdf;i(JB2iVT~e3FTW=D7G>P@+s8Sf-?VJF^{zZr(w)d}fbG5CabK`sobC5> zS$~=rIZ@Q0v=efexo@}C9>25---Rb<#3=dQpgQ>MFX-+#KlwHE%l`kUiBHPVAGS03{!Kcmn|~IyGT%A>pQ1L*bkcL|5~E^- zr*RIm62M$5JhHXa)WoWWgT1(oTqiMiO_w=$X;_qhRu&pjz(Fy#^s*2h7y>0fAW|M7 zq0^Sqqug4Cs*z6fGHu#dp0sC*H*LwR>fXE0VHnE&kJy!46+z zK-7oc2wwOr4<;@w+wN(jw*zi3^%xr|_IF*X$Ul7H+~}7kB`zXP!9G6c-`g&vV8Ek9 zb+1Qsg{hdbo?O0u*ioVig(P9^NSreD;Hc$wjISt8KzZY*Bx1d)B#-IDANg#pB5s@S zKhCYOtEP*XIKOkLAwp^B#TMs=UKz5qMh6u{wi~=F#ZyyZT55ak;4+aZnj%FY_}Mn-(lkN5%sZ27hCuc znd)UWA3EMuVfpYaF<-2GW8S;jV2WU+%HsNbvUh#5P;eIWXT8gA9wh}Tqa~>pMl^?@ zhjKQt9U|^^$<0aJU9`2nnQUFmx^2Bnf2IKv#f#P=L2P|@Li+sk#+XHcdvcuV$*J@F?r3z#|`-&oVG6=Nif;_mm$i ziamcQj1|I49A1pXSk&?!@*|QP;>F2eb?dkcf#JYXB#L1?65%}PmLVl%V$c~l%QBaA zP=D}To}-Mh2KiTF-Yjlom1^hR=|e6YD~7tlRsThx*d{3}gz#u&8u?J$rPk0Q470^p zM&eUr>5jJHExtTo>&J8VBmHI5BZagh)}!Dp?bBf5s`UC@n_a=OrzG1)i3~tl*^llQ zE(&yjr_*G;%rv&BF^_uQA#RdS(q`!t`M#I#WV$?3T39MVb)&Nygc#HcXL<)=!p{jy z`&Hwbqq6xMZu8z-RGHXs(C{5U?f%J1J^Z*?ZpF;mDxU$nM!((2K6+=vfbb%qtSE}@ zTHN$Pw)QM}yoqetdep~`&mqYR%Gxs`tQL;GY<^MnO z)=8%MzLp0QDfe_Y5*#Y1t0CRvOQ2gWs82O;QF#!NWbxk!-4~&C9M&##M zK|ebY!s@ahpd>^R1Ox$LU>T64B@ie?IR9b(B%WAj=WIuV=xfmKNtSs!G4`*j5o^Vii0=xNidkW1Q2`<0!SEpdJ-c1B#H^n*YR-l zB!v5q&+3GLmxnk=L(w1^5ZVqb?I=UAS;pQ$1_ef=?4b}e6be(M{=X?dE|_Z=LO6d9 RB0`wJAk+c^8gNbO{{Y*J){X!G diff --git a/doc/rabit.bib b/doc/rabit.bib deleted file mode 100644 index b95d108b9..000000000 --- a/doc/rabit.bib +++ /dev/null @@ -1,69 +0,0 @@ -@inproceedings {paramServer, -author = {Mu Li and David G. Andersen and Jun Woo Park and Alexander J. Smola and Amr Ahmed and Vanja Josifovski and James Long and Eugene J. Shekita and Bor-Yiing Su}, -title = {Scaling Distributed Machine Learning with the Parameter Server}, -booktitle = {11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14)}, -year = {2014}, -month = Oct, -isbn = { 978-1-931971-16-4}, -address = {Broomfield, CO}, -pages = {583--598}, -url = {https://www.usenix.org/conference/osdi14/technical-sessions/presentation/li_mu}, -publisher = {USENIX Association}, -} - -@article{DuchiAW12, - author = {Duchi, John C. and Agarwal, Alekh and Wainwright, Martin J.}, - biburl = {http://www.bibsonomy.org/bibtex/241ff9a4754f77538c4d5076acebbf772/dblp}, - ee = {http://dx.doi.org/10.1109/TAC.2011.2161027}, - journal = {IEEE Trans. Automat. Contr.}, - keywords = {dblp}, - number = 3, - pages = {592-606}, - title = {Dual Averaging for Distributed Optimization: Convergence Analysis and Network Scaling.}, - url = {http://dblp.uni-trier.de/db/journals/tac/tac57.html#DuchiAW12}, - volume = 57, - year = 2012 -} - -@INPROCEEDINGS{Zinkevich, - author = {Martin A. Zinkevich and Alex Smola and Markus Weimer and Lihong Li}, - title = {Parallelized stochastic gradient descent}, - booktitle = {Advances in Neural Information Processing Systems 23}, - year = {2010}, - pages = {2595--2603} -} - -@article{Dekel, - author = {Dekel, Ofer and Gilad-Bachrach, Ran and Shamir, Ohad and Xiao, Lin}, - biburl = {http://www.bibsonomy.org/bibtex/20603ddb3c1f66680cb38f01683f544c3/dblp}, - ee = {http://arxiv.org/abs/1012.1367}, - journal = {CoRR}, - keywords = {dblp}, - title = {Optimal Distributed Online Prediction using Mini-Batches}, - url = {http://dblp.uni-trier.de/db/journals/corr/corr1012.html#abs-1012-1367}, - volume = {abs/1012.1367}, - year = 2010 -} - -@inproceedings{Low, -title = {GraphLab: A New Parallel Framework for Machine Learning}, -author = {Yucheng Low and Joseph Gonzalez and Aapo Kyrola and Danny Bickson and Carlos Guestrin and Joseph M. Hellerstein}, -booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI)}, -month = {July}, -year = {2010}, -address = {Catalina Island, California}, -wwwfilebase = {uai2010-low-gonzalez-kyrola-bickson-guestrin-hellerstein}, -wwwtopic = {Parallel Learning}, -} - -@article{Agarwal, - author = {Agarwal, Alekh and Chapelle, Olivier and Dudík, Miroslav and Langford, John}, - biburl = {http://www.bibsonomy.org/bibtex/2e0e1d583d5b30e917e67124acbe3acd4/dblp}, - ee = {http://arxiv.org/abs/1110.4198}, - journal = {CoRR}, - keywords = {dblp}, - title = {A Reliable Effective Terascale Linear Learning System}, - url = {http://dblp.uni-trier.de/db/journals/corr/corr1110.html#abs-1110-4198}, - volume = {abs/1110.4198}, - year = 2011 -} diff --git a/doc/rabit.tex b/doc/rabit.tex deleted file mode 100644 index e2c96d53f..000000000 --- a/doc/rabit.tex +++ /dev/null @@ -1,94 +0,0 @@ -\documentclass[10pt,twocolumn]{article} - -\usepackage{times} -\usepackage{fullpage} -\usepackage{color} -\usepackage{natbib} -\usepackage{graphicx} - -\newcommand{\todo}[1]{\noindent{\textcolor{red}{\{{\bf TODO:} #1\}}}} - -\begin{document} - -\title{\bf RABIT: A Robust AllReduce and Broadcast Interface} -\author{Tianqi Chen\hspace{0.5in}Ignacio Cano\hspace{0.5in}Tianyi Zhou \\\\ -Department of Computer Science \& Engineering \\ -University of Washington\\ -} -\date{} -\maketitle -\thispagestyle{empty} - -\begin{abstract} - -AllReduce is an abstraction commonly used for solving machine learning problems. It is an operation where every node starts with a local value and ends up with an aggregate global result. -MPI package provides an AllReduce implementation. Though it has been widely adopted, it is somewhat limited; it lacks fault tolerance and cannot run easily on existent systems, such as Spark, Hadoop, etc. - -In this work, we propose RABIT, an AllReduce library suitable for distributed machine learning algorithms that overcomes the aforementioned drawbacks; it is fault-tolerant and can easily run on top of existent systems. - -\end{abstract} - -\section{Introduction} -Distributed machine learning is an active research area that has seen an incredible grow in recent years. Several approaches have been proposed, e.g. parameter server abstraction, graph approaches, among others \cite{paramServer,DuchiAW12,Zinkevich,Dekel,Low}. The closest example to our work is proposed by Agarwal et al. \cite{Agarwal}, in which they have a tree-shape communication infrastructure that efficiently accumulates and broadcasts values to every node involved in a computation. -\todo{add more} - -\section{AllReduce} - -In AllReduce settings, nodes are organized in a tree structure. Each node holds a portion of the data and computes some values on it. Those values are passed up the tree and aggregated, until a global aggregate value is calculated in the root node (reduce). The global value is then passed down to all other nodes (broadcast). - -Figure \ref{allreduce} shows an example of an AllReduce sum operation. The leaf nodes passed data to their parents (interior nodes). Such interior nodes compute an intermediate aggregate and pass the value to the root, which in turn computes the final aggregate and then passes back the result to every node in the cluster. -\todo{add more} - -\begin{figure}[tb] -\centering -\includegraphics[width=0.7\columnwidth]{fig/allreduce.pdf} -\caption{AllReduce example} -\label{allreduce} -\end{figure} - - -\section{RABIT} - - -\subsection{Design Goals} - -The design of RABIT was motivated by the following needs: - -\begin{enumerate} - \item \emph{Distributed}: machine learning algorithms are inherently iterative and computation intensive. Given the vast amount of data they can work on, it may be intractable to perform all the processing on a single machine. Instead, we want to divide the computation into different nodes, each one would be in charge of computing statistics on some portion of the data, and then have a combination step, where all those independent local solutions will be aggregated into a single result. - \item \emph{Scalability}: we want our solution to handle a growing amount of work in a capable manner, i.e. we should be able to accommodate to data and computation growth by adding more nodes. - \item \emph{Fault Tolerance}: we assume an environment where failures happen, either machines can go down or communication failures occur. Given the computation intensive nature of machine learning problems, we want to be able to continue operating properly in the event of a failure, instead of starting the process all over again. - \item \emph{Programmability}: we want to provide a clean interface that can be easily used by programmers. With few lines of code, they should be able to have a fault-tolerant AllReduce implementation. - \item \emph{Re-usability}: we want to build a library based on a few low-level primitives, e.g. AllReduce and Broadcast operations. Higher level abstractions, e.g. Recover operation, should reuse those basic building blocks. - \item \emph{Communication Efficiency}: closely related to the \emph{Scalability} goal. We want to send as few control messages as possible. We also want to reuse existent connections in order to avoid starting overheads. - \item \emph{Footprint}: we want to have a low memory footprint while running as well as provide a lightweight footprint library. -\end{enumerate} - -\subsection{Proposed Solution} - -\todo{what we did} - - -\subsubsection{Interface} - -\todo{API, how to use it} - -\section{Evaluation} - -\todo{add benchmarks and our results} - - -\section{Conclusion \& Future Work} - -With the exponential increase of data on the web, it becomes critical to build systems that can process information efficiently in order to extract value out of it. Several abstractions have been proposed to address those requirements. In this project, we focus on the AllReduce abstraction. We propose an efficient and fault tolerant version that can be used together with existent big data analytics systems, such as Spark, Hadoop, etc. -We compare our solution to MPI's AllReduce implementation, and show that the performance difference between the two is negligible considering our version is fault tolerant. -\todo{improve this} - -\subsection*{Acknowledgments} -Thanks to Arvind Krishnamurthy and the CSE550 teaching staff for their guidance and support during the quarter. - -\bibliography{rabit} -\bibliographystyle{abbrv} - -\end{document} - From f9d634ce06013459fd09b5508f380c47c762e5d0 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 4 Dec 2014 09:09:29 -0800 Subject: [PATCH 073/531] change notes --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index 0aa5cf527..74f641695 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,6 @@ Design Note ==== * Rabit is designed for algorithms that replicate same global model across nodes, while each node operating on local parition of data. * The global statistics collection is done by using Allreduce -* Currently, Rabit is not good at problems where model is distributed across nodes, other abstractions might suits the purpose (for example [parameter server](https://github.com/mli/parameter_server)) Design Goal ==== @@ -28,8 +27,3 @@ Features * MPI compatible - Codes using rabit interface naturally compiles with existing MPI compiler - User can fall back to use MPI Allreduce if they like with no code modification - -Persistence of Program -==== -Many complicated Machine learning algorithm involves things like temporal memory allocation, result caching. -It is good to have a program that persist over iterations and keeps the resources instead of re-allocate and re-compute the caching every time. Rabit allows the process to persist over all iterations. From 79e7862583eacdb31d4d8eba40545633197491a1 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 4 Dec 2014 09:09:56 -0800 Subject: [PATCH 074/531] change note --- README.md | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 74f641695..eac939317 100644 --- a/README.md +++ b/README.md @@ -4,17 +4,6 @@ rabit is a light weight library that provides a fault tolerant interface of Allr Contributors: https://github.com/tqchen/rabit/graphs/contributors -Design Note -==== -* Rabit is designed for algorithms that replicate same global model across nodes, while each node operating on local parition of data. -* The global statistics collection is done by using Allreduce - -Design Goal -==== -* rabit should run fast -* rabit is light weight -* rabit dig safe burrows to avoid disasters - Features ==== * Portable library @@ -27,3 +16,15 @@ Features * MPI compatible - Codes using rabit interface naturally compiles with existing MPI compiler - User can fall back to use MPI Allreduce if they like with no code modification + +Design Note +==== +* Rabit is designed for algorithms that replicate same global model across nodes, while each node operating on local parition of data. +* The global statistics collection is done by using Allreduce + +Design Goal +==== +* rabit should run fast +* rabit is light weight +* rabit dig safe burrows to avoid disasters + From cc410b8c90f035846ba957fd313f91b4a1f1fdba Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 4 Dec 2014 11:09:15 -0800 Subject: [PATCH 075/531] add local model in checkpoint interface, a new goal --- src/allreduce_base.cc | 2 +- src/allreduce_base.h | 35 ++++++++-- src/allreduce_robust.cc | 139 +++++++++++++++++++++++++++++++++++----- src/allreduce_robust.h | 76 +++++++++++++++++++--- src/engine.h | 24 +++++-- src/engine_mpi.cc | 6 +- src/rabit-inl.h | 2 +- 7 files changed, 248 insertions(+), 36 deletions(-) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 4c30b62d2..eba06a504 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -348,7 +348,7 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, } if (len != -1) { size_down_in += static_cast(len); - utils::Assert(size_down_in <= size_up_out, "Allreduce: boundary error"); + utils::Assert(size_down_in <= size_up_out, "Allreduce: boundary error"); } else { if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; } diff --git a/src/allreduce_base.h b/src/allreduce_base.h index cd9a5b0d0..29e05f8e5 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -84,23 +84,48 @@ class AllreduceBase : public IEngine { } /*! * \brief load latest check point - * \param p_model pointer to the model + * \param global_model pointer to the globally shared model/state + * when calling this function, the caller need to gauranttees that global_model + * is the same in all nodes + * \param local_model pointer to local model, that is specific to current node/rank + * this can be NULL when no local model is needed + * * \return the version number of check point loaded * if returned version == 0, this means no model has been CheckPointed * the p_model is not touched, user should do necessary initialization by themselves + * + * Common usage example: + * int iter = rabit::LoadCheckPoint(&model); + * if (iter == 0) model.InitParameters(); + * for (i = iter; i < max_iter; ++i) { + * do many things, include allreduce + * rabit::CheckPoint(model); + * } + * * \sa CheckPoint, VersionNumber */ - virtual int LoadCheckPoint(utils::ISerializable *p_model) { + virtual int LoadCheckPoint(utils::ISerializable *global_model, + utils::ISerializable *local_model = NULL) { return 0; } /*! * \brief checkpoint the model, meaning we finished a stage of execution * every time we call check point, there is a version number which will increase by one * - * \param p_model pointer to the model + * \param global_model pointer to the globally shared model/state + * when calling this function, the caller need to gauranttees that global_model + * is the same in all nodes + * \param local_model pointer to local model, that is specific to current node/rank + * this can be NULL when no local state is needed + * + * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will + * bring replication cost in CheckPoint function. global_model do not need explicit replication. + * So only CheckPoint with global_model if possible + * * \sa LoadCheckPoint, VersionNumber */ - virtual void CheckPoint(const utils::ISerializable &model) { + virtual void CheckPoint(const utils::ISerializable *global_model, + const utils::ISerializable *local_model = NULL) { version_number += 1; } /*! @@ -267,6 +292,8 @@ class AllreduceBase : public IEngine { int parent_rank; // sockets of all links std::vector links; + // pointer to someplace in the ring + LinkRecord *ring_prev, *ring_next; //----- meta information----- // unique identifier of the possible job this process is doing // used to assign ranks, optional, default to NULL diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 6aba63e82..a878f5618 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -17,6 +17,7 @@ namespace rabit { namespace engine { AllreduceRobust::AllreduceRobust(void) { result_buffer_round = 1; + num_local_replica = 2; seq_counter = 0; } /*! \brief shutdown the engine */ @@ -108,22 +109,38 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) } /*! * \brief load latest check point - * \param p_model pointer to the model + * \param global_model pointer to the globally shared model/state + * when calling this function, the caller need to gauranttees that global_model + * is the same in all nodes + * \param local_model pointer to local model, that is specific to current node/rank + * this can be NULL when no local model is needed + * * \return the version number of check point loaded * if returned version == 0, this means no model has been CheckPointed * the p_model is not touched, user should do necessary initialization by themselves + * + * Common usage example: + * int iter = rabit::LoadCheckPoint(&model); + * if (iter == 0) model.InitParameters(); + * for (i = iter; i < max_iter; ++i) { + * do many things, include allreduce + * rabit::CheckPoint(model); + * } + * * \sa CheckPoint, VersionNumber */ -int AllreduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { +int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, + utils::ISerializable *local_model) { + utils::Check(local_model == NULL, "CheckPoint local_model is not yet supported"); // check if we succesfll if (RecoverExec(NULL, 0, ActionSummary::kLoadCheck, ActionSummary::kMaxSeq)) { // reset result buffer resbuf.Clear(); seq_counter = 0; // load from buffer - utils::MemoryBufferStream fs(&checked_model); + utils::MemoryBufferStream fs(&mglobal_model); fs.Read(&version_number, sizeof(version_number)); if (version_number == 0) return version_number; - p_model->Load(fs); + global_model->Load(fs); // run another phase of check ack, if recovered from data utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kMaxSeq), "check ack must return true"); @@ -139,20 +156,31 @@ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *p_model) { * \brief checkpoint the model, meaning we finished a stage of execution * every time we call check point, there is a version number which will increase by one * - * \param p_model pointer to the model + * \param global_model pointer to the globally shared model/state + * when calling this function, the caller need to gauranttees that global_model + * is the same in all nodes + * \param local_model pointer to local model, that is specific to current node/rank + * this can be NULL when no local state is needed + * + * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will + * bring replication cost in CheckPoint function. global_model do not need explicit replication. + * So only CheckPoint with global_model if possible + * * \sa LoadCheckPoint, VersionNumber */ -void AllreduceRobust::CheckPoint(const utils::ISerializable &model) { - // increase version number - version_number += 1; - // save model - checked_model.resize(0); - utils::MemoryBufferStream fs(&checked_model); - fs.Write(&version_number, sizeof(version_number)); - model.Save(fs); +void AllreduceRobust::CheckPoint(const utils::ISerializable *global_model, + const utils::ISerializable *local_model) { + utils::Assert(local_model == NULL, "CheckPoint local model is not supported yet"); // execute checkpoint, note: when checkpoint existing, load will not happen utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kMaxSeq), "check point must return true"); + // increase version number + version_number += 1; + // save model + mglobal_model.resize(0); + utils::MemoryBufferStream fs(&mglobal_model); + fs.Write(&version_number, sizeof(version_number)); + global_model->Save(fs); // reset result buffer resbuf.Clear(); seq_counter = 0; // execute check ack step, load happens here @@ -488,6 +516,10 @@ AllreduceRobust::TryRecoverData(RecoverType role, } if (finished) break; selecter.Select(); + // exception handling + for (int i = 0; i < nlink; ++i) { + if (selecter.CheckExcept(links[i].sock)) return kGetExcept; + } if (role == kRequestData) { const int pid = recv_link; if (selecter.CheckRead(links[pid].sock)) { @@ -548,16 +580,16 @@ AllreduceRobust::TryRecoverData(RecoverType role, */ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) { RecoverType role = requester ? kRequestData : kHaveData; - size_t size = this->checked_model.length(); + size_t size = this->mglobal_model.length(); int recv_link; std::vector req_in; ReturnType succ = TryDecideRouting(role, &size, &recv_link, &req_in); if (succ != kSuccess) return succ; if (role == kRequestData) { - checked_model.resize(size); + mglobal_model.resize(size); } if (size == 0) return kSuccess; - return TryRecoverData(role, &checked_model[0], size, recv_link, req_in); + return TryRecoverData(role, &mglobal_model[0], size, recv_link, req_in); } /*! * \brief try to get the result of operation specified by seqno @@ -674,6 +706,81 @@ bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { utils::Assert(false, "RecoverExec: should not reach here"); return true; } + +/*! + * \brief perform a ring passing to receive data from prev link, and sent data to next link + * this allows data to stream over a ring structure + * sendrecvbuf[0:read_ptr] are already provided by current node + * current node will recv sendrecvbuf[read_ptr:read_end] from prev link + * current node will send sendrecvbuf[write_ptr:write_end] to next link + * write_ptr will wait till the data is readed before sending the data + * this function requires read_end >= write_end + * + * \param sendrecvbuf_ the place to hold the incoming and outgoing data + * \param read_ptr the initial read pointer + * \param read_end the ending position to read + * \param write_ptr the initial write pointer + * \param write_end the ending position to write + * \param prev pointer to link to previous position in ring + * \param prev pointer to link of next position in ring + */ +AllreduceRobust::ReturnType +AllreduceRobust::RingPassing(void *sendrecvbuf_, + size_t read_ptr, + size_t read_end, + size_t write_ptr, + size_t write_end, + LinkRecord *prev_link, + LinkRecord *next_link) { + if (links.size() == 0 || read_end == 0) return kSuccess; + utils::Assert(read_end <= write_end, "boundary check"); + utils::Assert(read_ptr <= read_end, "boundary check"); + utils::Assert(write_ptr <= write_end, "boundary check"); + // take reference + LinkRecord &prev = *prev_link, &next = *next_link; + // send recv buffer + char *buf = reinterpret_cast(sendrecvbuf_); + while (true) { + bool finished = true; + utils::SelectHelper selecter; + if (read_ptr != read_end) { + selecter.WatchRead(prev.sock); + finished = false; + } + if (write_ptr < read_ptr && write_ptr != write_end) { + selecter.WatchWrite(next.sock); + finished = false; + } + selecter.WatchException(prev.sock); + selecter.WatchException(next.sock); + if (finished) break; + selecter.Select(); + if (selecter.CheckExcept(prev.sock)) return kGetExcept; + if (selecter.CheckExcept(next.sock)) return kGetExcept; + if (read_ptr != read_end && selecter.CheckRead(prev.sock)) { + ssize_t len = prev.sock.Recv(buf + read_ptr, read_end - read_ptr); + if (len == 0) { + prev.sock.Close(); return kSockError; + } + if (len != -1) { + read_ptr += static_cast(len); + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + } + } + if (write_ptr != write_end && write_ptr < read_ptr && + selecter.CheckWrite(next.sock)) { + size_t nsend = std::min(write_end - write_ptr, read_ptr - write_ptr); + ssize_t len = next.sock.Send(buf + write_ptr, nsend); + if (len != -1) { + write_ptr += static_cast(len); + } else { + if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; + } + } + } + return kSuccess; +} } // namespace engine } // namespace rabit diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index ad660da94..d1018907c 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -49,21 +49,46 @@ class AllreduceRobust : public AllreduceBase { virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root); /*! * \brief load latest check point - * \param p_model pointer to the model + * \param global_model pointer to the globally shared model/state + * when calling this function, the caller need to gauranttees that global_model + * is the same in all nodes + * \param local_model pointer to local model, that is specific to current node/rank + * this can be NULL when no local model is needed + * * \return the version number of check point loaded * if returned version == 0, this means no model has been CheckPointed * the p_model is not touched, user should do necessary initialization by themselves + * + * Common usage example: + * int iter = rabit::LoadCheckPoint(&model); + * if (iter == 0) model.InitParameters(); + * for (i = iter; i < max_iter; ++i) { + * do many things, include allreduce + * rabit::CheckPoint(model); + * } + * * \sa CheckPoint, VersionNumber */ - virtual int LoadCheckPoint(utils::ISerializable *p_model); + virtual int LoadCheckPoint(utils::ISerializable *global_model, + utils::ISerializable *local_model = NULL); /*! * \brief checkpoint the model, meaning we finished a stage of execution * every time we call check point, there is a version number which will increase by one * - * \param p_model pointer to the model + * \param global_model pointer to the globally shared model/state + * when calling this function, the caller need to gauranttees that global_model + * is the same in all nodes + * \param local_model pointer to local model, that is specific to current node/rank + * this can be NULL when no local state is needed + * + * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will + * bring replication cost in CheckPoint function. global_model do not need explicit replication. + * So only CheckPoint with global_model if possible + * * \sa LoadCheckPoint, VersionNumber */ - virtual void CheckPoint(const utils::ISerializable &model); + virtual void CheckPoint(const utils::ISerializable *global_model, + const utils::ISerializable *local_model = NULL); /*! * \brief explicitly re-init everything before calling LoadCheckPoint * call this function when IEngine throw an exception out, @@ -259,7 +284,7 @@ class AllreduceRobust : public AllreduceBase { * result by recovering procedure, the action is complete, no further action is needed * - false means this is the lastest action that has not yet been executed, need to execute the action */ - bool RecoverExec(void *buf, size_t size, int flag, int seqno = ActionSummary::kMaxSeq); + bool RecoverExec(void *buf, size_t size, int flag, int seqno = ActionSummary::kMaxSeq); /*! * \brief try to load check point * @@ -325,6 +350,30 @@ class AllreduceRobust : public AllreduceBase { size_t size, int recv_link, const std::vector &req_in); + /*! + * \brief perform a ring passing to receive data from prev link, and sent data to next link + * this allows data to stream over a ring structure + * sendrecvbuf[0:read_ptr] are already provided by current node + * current node will recv sendrecvbuf[read_ptr:read_end] from prev link + * current node will send sendrecvbuf[write_ptr:write_end] to next link + * write_ptr will wait till the data is readed before sending the data + * this function requires read_end >= write_end + * + * \param sendrecvbuf_ the place to hold the incoming and outgoing data + * \param read_ptr the initial read pointer + * \param read_end the ending position to read + * \param write_ptr the initial write pointer + * \param write_end the ending position to write + * \param prev pointer to link to previous position in ring + * \param prev pointer to link of next position in ring + */ + ReturnType RingPassing(void *senrecvbuf_, + size_t read_ptr, + size_t read_end, + size_t write_ptr, + size_t write_end, + LinkRecord *prev_link, + LinkRecord *next_link); /*! * \brief run message passing algorithm on the allreduce tree * the result is edge message stored in p_edge_in and p_edge_out @@ -358,10 +407,21 @@ class AllreduceRobust : public AllreduceBase { int seq_counter; // the round of result buffer, used to mode the result int result_buffer_round; - // result buffer + // result buffer of all reduce ResultBuffer resbuf; - // last check point model - std::string checked_model; + // last check point global model + std::string mglobal_model; + // number of replica for local state/model + int num_local_replica; + // pointer to memory position in the local model + // local model is stored in CSR format(like a sparse matrices) + // local_model[rptr[0]:rptr[1]] stores the model of current node + // local_model[rptr[k]:rptr[k+1]] stores the model of node in previous k hops in the ring + std::vector local_rptr; + // storage for local model replicas + std::string mlocal_model; + // temporal storage + std::string tmp_local_model; }; } // namespace engine } // namespace rabit diff --git a/src/engine.h b/src/engine.h index 6d95fe5dc..e393e94db 100644 --- a/src/engine.h +++ b/src/engine.h @@ -60,7 +60,12 @@ class IEngine { virtual void InitAfterException(void) = 0; /*! * \brief load latest check point - * \param p_model pointer to the model + * \param global_model pointer to the globally shared model/state + * when calling this function, the caller need to gauranttees that global_model + * is the same in all nodes + * \param local_model pointer to local model, that is specific to current node/rank + * this can be NULL when no local model is needed + * * \return the version number of check point loaded * if returned version == 0, this means no model has been CheckPointed * the p_model is not touched, user should do necessary initialization by themselves @@ -75,15 +80,26 @@ class IEngine { * * \sa CheckPoint, VersionNumber */ - virtual int LoadCheckPoint(utils::ISerializable *p_model) = 0; + virtual int LoadCheckPoint(utils::ISerializable *global_model, + utils::ISerializable *local_model = NULL) = 0; /*! * \brief checkpoint the model, meaning we finished a stage of execution * every time we call check point, there is a version number which will increase by one * - * \param p_model pointer to the model + * \param global_model pointer to the globally shared model/state + * when calling this function, the caller need to gauranttees that global_model + * is the same in all nodes + * \param local_model pointer to local model, that is specific to current node/rank + * this can be NULL when no local state is needed + * + * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will + * bring replication cost in CheckPoint function. global_model do not need explicit replication. + * So only CheckPoint with global_model if possible + * * \sa LoadCheckPoint, VersionNumber */ - virtual void CheckPoint(const utils::ISerializable &model) = 0; + virtual void CheckPoint(const utils::ISerializable *global_model, + const utils::ISerializable *local_model = NULL) = 0; /*! * \return version number of current stored model, * which means how many calls to CheckPoint we made so far diff --git a/src/engine_mpi.cc b/src/engine_mpi.cc index 03bd0cb73..f32dba854 100644 --- a/src/engine_mpi.cc +++ b/src/engine_mpi.cc @@ -32,10 +32,12 @@ class MPIEngine : public IEngine { virtual void InitAfterException(void) { utils::Error("MPI is not fault tolerant"); } - virtual int LoadCheckPoint(utils::ISerializable *p_model) { + virtual int LoadCheckPoint(utils::ISerializable *global_model, + utils::ISerializable *local_model = NULL) { return 0; } - virtual void CheckPoint(const utils::ISerializable &model) { + virtual void CheckPoint(const utils::ISerializable *global_model, + const utils::ISerializable *local_model = NULL) { version_number += 1; } virtual int VersionNumber(void) const { diff --git a/src/rabit-inl.h b/src/rabit-inl.h index f3fd39b2a..b13ea88fc 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -129,7 +129,7 @@ inline int LoadCheckPoint(utils::ISerializable *p_model) { } // checkpoint the model, meaning we finished a stage of execution inline void CheckPoint(const utils::ISerializable &model) { - engine::GetEngine()->CheckPoint(model); + engine::GetEngine()->CheckPoint(&model); } // return the version number of currently stored model inline int VersionNumber(void) { From 821eb21ae28a5f1c4acb6dad8646768b31652138 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 4 Dec 2014 17:30:58 -0800 Subject: [PATCH 076/531] before make rabit public --- README.md | 3 +-- src/allreduce_robust.cc | 15 ++++++++------- src/allreduce_robust.h | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index eac939317..a7b333eb8 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementation of distributed machine learning programs, many of which sits naturally under Allreduce abstraction. -Contributors: https://github.com/tqchen/rabit/graphs/contributors +Interface: [rabit.h](src/rabit.h) Features ==== @@ -27,4 +27,3 @@ Design Goal * rabit should run fast * rabit is light weight * rabit dig safe burrows to avoid disasters - diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index a878f5618..3232ef40c 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -137,7 +137,7 @@ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, // reset result buffer resbuf.Clear(); seq_counter = 0; // load from buffer - utils::MemoryBufferStream fs(&mglobal_model); + utils::MemoryBufferStream fs(&global_checkpoint); fs.Read(&version_number, sizeof(version_number)); if (version_number == 0) return version_number; global_model->Load(fs); @@ -155,7 +155,7 @@ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, /*! * \brief checkpoint the model, meaning we finished a stage of execution * every time we call check point, there is a version number which will increase by one - * + * * \param global_model pointer to the globally shared model/state * when calling this function, the caller need to gauranttees that global_model * is the same in all nodes @@ -174,11 +174,12 @@ void AllreduceRobust::CheckPoint(const utils::ISerializable *global_model, // execute checkpoint, note: when checkpoint existing, load will not happen utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kMaxSeq), "check point must return true"); + // this is the critical region where we will change all the stored models // increase version number version_number += 1; // save model - mglobal_model.resize(0); - utils::MemoryBufferStream fs(&mglobal_model); + global_checkpoint.resize(0); + utils::MemoryBufferStream fs(&global_checkpoint); fs.Write(&version_number, sizeof(version_number)); global_model->Save(fs); // reset result buffer @@ -580,16 +581,16 @@ AllreduceRobust::TryRecoverData(RecoverType role, */ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) { RecoverType role = requester ? kRequestData : kHaveData; - size_t size = this->mglobal_model.length(); + size_t size = this->global_checkpoint.length(); int recv_link; std::vector req_in; ReturnType succ = TryDecideRouting(role, &size, &recv_link, &req_in); if (succ != kSuccess) return succ; if (role == kRequestData) { - mglobal_model.resize(size); + global_checkpoint.resize(size); } if (size == 0) return kSuccess; - return TryRecoverData(role, &mglobal_model[0], size, recv_link, req_in); + return TryRecoverData(role, &global_checkpoint[0], size, recv_link, req_in); } /*! * \brief try to get the result of operation specified by seqno diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index d1018907c..45820a017 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -349,7 +349,7 @@ class AllreduceRobust : public AllreduceBase { void *sendrecvbuf_, size_t size, int recv_link, - const std::vector &req_in); + const std::vector &req_in); /*! * \brief perform a ring passing to receive data from prev link, and sent data to next link * this allows data to stream over a ring structure @@ -410,7 +410,7 @@ class AllreduceRobust : public AllreduceBase { // result buffer of all reduce ResultBuffer resbuf; // last check point global model - std::string mglobal_model; + std::string global_checkpoint; // number of replica for local state/model int num_local_replica; // pointer to memory position in the local model @@ -419,9 +419,9 @@ class AllreduceRobust : public AllreduceBase { // local_model[rptr[k]:rptr[k+1]] stores the model of node in previous k hops in the ring std::vector local_rptr; // storage for local model replicas - std::string mlocal_model; - // temporal storage - std::string tmp_local_model; + std::string local_checkpoint; + // temporal storage for doing local checkpointing + std::string tmp_local_check; }; } // namespace engine } // namespace rabit From 7cd5474f1a358cc3279c6ba7de7a591c23d6e302 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 4 Dec 2014 17:31:40 -0800 Subject: [PATCH 077/531] chg interface --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a7b333eb8..2f1f58478 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementation of distributed machine learning programs, many of which sits naturally under Allreduce abstraction. -Interface: [rabit.h](src/rabit.h) +* See the [package interface file](src/rabit.h) Features ==== From 1af3e81ada1700fd0ea2d67178c5cbd16f006d04 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 4 Dec 2014 17:32:22 -0800 Subject: [PATCH 078/531] chg robust to reliable --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f1f58478..c8374cb61 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## rabit: Robust Allreduce and Broadcast Interface +## rabit: Reliable Allreduce and Broadcast Interface rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementation of distributed machine learning programs, many of which sits naturally under Allreduce abstraction. From e9a3f5169ee5e6f6825880c7e41decef62ae854c Mon Sep 17 00:00:00 2001 From: nachocano Date: Thu, 4 Dec 2014 18:02:07 -0800 Subject: [PATCH 079/531] cosmetic changes --- README.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index c8374cb61..2d9f1caab 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,29 @@ ## rabit: Reliable Allreduce and Broadcast Interface -rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementation of distributed machine learning programs, many of which sits naturally under Allreduce abstraction. +rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementations of distributed machine learning programs, many of which fall naturally under the Allreduce abstraction. * See the [package interface file](src/rabit.h) Features ==== * Portable library - - Rabit is a library instead of framework, program only need to link the library to run, without restricting to a single framework. + - Rabit is a library instead of a framework, a program only needs to link the library to run, without restricting to a single framework. * Flexibility in programming - - Programs call rabit functions in any sequence, as opposed to defines limited functions and being called. - - Program persist over all the iterations, unless it fails and recover + - Programs can call rabit functions in any sequence, as opposed to frameworks where callbacks are being offered and are called by the framework, i.e. inversion of control principle. + - Programs persist over all the iterations, unless they fail and recover * Fault tolerance - - Rabit program can recover model and results of syncrhonization functions calls + - Rabit programs can recover the model and results from synchronized function calls * MPI compatible - - Codes using rabit interface naturally compiles with existing MPI compiler - - User can fall back to use MPI Allreduce if they like with no code modification + - Code that uses the rabit interface also compiles with existing MPI compilers + - Users can use MPI Allreduce with no code modification -Design Note +Design Notes ==== -* Rabit is designed for algorithms that replicate same global model across nodes, while each node operating on local parition of data. -* The global statistics collection is done by using Allreduce +* Rabit is designed for algorithms that replicate the same global model across nodes, while each node operates on a local partition of the data. +* The collection of global statistics is done using Allreduce -Design Goal +Design Goals ==== * rabit should run fast -* rabit is light weight -* rabit dig safe burrows to avoid disasters +* rabit should be light weight +* rabit should safely dig burrows to avoid disasters From e00fb99e7b0e491572f146c110ab5f6e7a416d46 Mon Sep 17 00:00:00 2001 From: nachocano Date: Thu, 4 Dec 2014 19:02:11 -0800 Subject: [PATCH 080/531] cosmetic --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 2d9f1caab..995f7b274 100644 --- a/README.md +++ b/README.md @@ -7,12 +7,12 @@ rabit is a light weight library that provides a fault tolerant interface of Allr Features ==== * Portable library - - Rabit is a library instead of a framework, a program only needs to link the library to run, without restricting to a single framework. + - Rabit is a library instead of a framework, a program only needs to link the library to run. * Flexibility in programming - - Programs can call rabit functions in any sequence, as opposed to frameworks where callbacks are being offered and are called by the framework, i.e. inversion of control principle. - - Programs persist over all the iterations, unless they fail and recover + - Programs can call rabit functions in any order, as opposed to frameworks where callbacks are offered and called by the framework, i.e. inversion of control principle. + - Programs persist over all the iterations, unless they fail and recover. * Fault tolerance - - Rabit programs can recover the model and results from synchronized function calls + - Rabit programs can recover the model and results using synchronous function calls. * MPI compatible - Code that uses the rabit interface also compiles with existing MPI compilers - Users can use MPI Allreduce with no code modification From bb7d6814a7e020f062f474967080179e4ba787ad Mon Sep 17 00:00:00 2001 From: nachocano Date: Fri, 5 Dec 2014 03:27:02 -0800 Subject: [PATCH 081/531] creating initial version of hadoop submit script. Not working. Not sure how to get the master uri and port. I believe I cannot do it before I launch the job. Updating the name from submit_job to submit_job_mpi --- submit_job_hadoop.py | 39 +++++++++++++++++++++++++++++++++++++++ test/test.sh | 2 +- 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100755 submit_job_hadoop.py diff --git a/submit_job_hadoop.py b/submit_job_hadoop.py new file mode 100755 index 000000000..af061360c --- /dev/null +++ b/submit_job_hadoop.py @@ -0,0 +1,39 @@ +#!/usr/bin/python +""" +This is an example job submit script for hadoop streaming +""" +import argparse +import sys +import os +import subprocess +sys.path.append('./src/') +from rabit_master import Master +from threading import Thread + + +def hadoop_streaming(nslaves, slave_args): + cmd = '%s jar %s -input %s -output %s -mapper %s -reducer /bin/cat stdin %d %d stdout' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations) + print cmd + subprocess.check_call(cmd, shell = True) + + +parser = argparse.ArgumentParser(description='Hadoop Streaming submission script') +parser.add_argument('-s', '--nslaves', required=True, type=int) +parser.add_argument('-hb', '--hadoop_binary', required=True) +parser.add_argument('-hs', '--hadoop_streaming_jar', required=True) +parser.add_argument('-i', '--input', required=True) +parser.add_argument('-o', '--output', required=True) +parser.add_argument('-m', '--mapper', required=True) +#parser.add_argument('-r', '--reducer', required=False) +parser.add_argument('-k', '--nclusters', required=True, type=int) +parser.add_argument('-itr', '--iterations', required=True, type=int) +args = parser.parse_args() + +master = Master() +# this is availabe after triggered the hadoop streaming job, not sure how to do it +# os.environ["mapred_job_id"] +slave_args = ['master_uri=%s' % 'TODO', 'master_port=%s' % 'TODO'] +submit_thread = Thread(target = hadoop_streaming, args = slave_args) +submit_thread.start() +master.accept_slaves(args.nslaves) +submit_thread.join() diff --git a/test/test.sh b/test/test.sh index 78d267157..c323785dd 100755 --- a/test/test.sh +++ b/test/test.sh @@ -5,4 +5,4 @@ then exit -1 fi -../submit_job_tcp.py $1 test_recover "${@:2}" +../submit_job.py $1 test_recover "${@:2}" From f9e95ab52231493e99fd5c71d334e5b8a4eb852e Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Fri, 5 Dec 2014 09:09:20 -0800 Subject: [PATCH 082/531] Update submit_job_hadoop.py --- submit_job_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submit_job_hadoop.py b/submit_job_hadoop.py index af061360c..4990b62da 100755 --- a/submit_job_hadoop.py +++ b/submit_job_hadoop.py @@ -12,7 +12,7 @@ from threading import Thread def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -input %s -output %s -mapper %s -reducer /bin/cat stdin %d %d stdout' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations) + cmd = '%s jar %s -input %s -output %s -mapper \"%s %s\" -reducer /bin/cat stdin %d %d stdout' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, slave_args, args.nclusters, args.iterations) print cmd subprocess.check_call(cmd, shell = True) From e05098cacbb8b8a3c0db588ceecba856eda5c56f Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Fri, 5 Dec 2014 09:10:26 -0800 Subject: [PATCH 083/531] Update submit_job_hadoop.py --- submit_job_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submit_job_hadoop.py b/submit_job_hadoop.py index 4990b62da..eef480fa2 100755 --- a/submit_job_hadoop.py +++ b/submit_job_hadoop.py @@ -12,7 +12,7 @@ from threading import Thread def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -input %s -output %s -mapper \"%s %s\" -reducer /bin/cat stdin %d %d stdout' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, slave_args, args.nclusters, args.iterations) + cmd = '%s jar %s -input %s -output %s -mapper \"%s %s\" -reducer /bin/cat stdin %d %d stdout' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, ' '.join(slave_args), args.nclusters, args.iterations) print cmd subprocess.check_call(cmd, shell = True) From e7a22792acb794aeda7fa905ba3a3dbc29350bea Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Fri, 5 Dec 2014 09:14:44 -0800 Subject: [PATCH 084/531] Update submit_job_hadoop.py --- submit_job_hadoop.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/submit_job_hadoop.py b/submit_job_hadoop.py index eef480fa2..fbb83a79f 100755 --- a/submit_job_hadoop.py +++ b/submit_job_hadoop.py @@ -7,15 +7,7 @@ import sys import os import subprocess sys.path.append('./src/') -from rabit_master import Master -from threading import Thread - - -def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -input %s -output %s -mapper \"%s %s\" -reducer /bin/cat stdin %d %d stdout' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, ' '.join(slave_args), args.nclusters, args.iterations) - print cmd - subprocess.check_call(cmd, shell = True) - +import rabit_master as master parser = argparse.ArgumentParser(description='Hadoop Streaming submission script') parser.add_argument('-s', '--nslaves', required=True, type=int) @@ -29,11 +21,9 @@ parser.add_argument('-k', '--nclusters', required=True, type=int) parser.add_argument('-itr', '--iterations', required=True, type=int) args = parser.parse_args() -master = Master() -# this is availabe after triggered the hadoop streaming job, not sure how to do it -# os.environ["mapred_job_id"] -slave_args = ['master_uri=%s' % 'TODO', 'master_port=%s' % 'TODO'] -submit_thread = Thread(target = hadoop_streaming, args = slave_args) -submit_thread.start() -master.accept_slaves(args.nslaves) -submit_thread.join() +def hadoop_streaming(nslaves, slave_args): + cmd = '%s jar %s -input %s -output %s -mapper \"%s %s\" -reducer /bin/cat stdin %d %d stdout' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, ' '.join(slave_args), args.nclusters, args.iterations) + print cmd + subprocess.check_call(cmd, shell = True) + +master.submit(args.nslaves, [], fun_submit= hadoop_streaming) From ab278513abd73f55ea8f27d9659a171ab4b3e725 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 5 Dec 2014 09:39:51 -0800 Subject: [PATCH 085/531] ok --- src/allreduce_base.cc | 23 +++++++++++++++++++---- src/allreduce_base.h | 4 +++- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index eba06a504..6eb77b6dd 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -7,6 +7,7 @@ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX +#include #include #include "./allreduce_base.h" @@ -21,13 +22,26 @@ AllreduceBase::AllreduceBase(void) { nport_trial = 1000; rank = -1; world_size = 1; + hadoop_mode = 0; version_number = 0; - job_id = "NULL"; + task_id = "NULL"; this->SetParam("reduce_buffer", "256MB"); } // initialization function void AllreduceBase::Init(void) { + { + // handling for hadoop + const char *task_id = getenv("mapred_task_id"); + if (hadoop_mode != 0) { + utils::Check(task_id != NULL, "hadoop_mode is set but cannot find mapred_task_id"); + } + if (task_id != NULL) { + this->SetParam("task_id", task_id); + this->SetParam("hadoop_mode", "1"); + } + } + // start socket utils::Socket::Startup(); utils::Assert(links.size() == 0, "can only call Init once"); this->host_uri = utils::SockAddr::GetHostName(); @@ -54,7 +68,7 @@ void AllreduceBase::Shutdown(void) { utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); utils::Assert(master.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); - master.SendStr(job_id); + master.SendStr(task_id); master.SendStr(std::string("shutdown")); master.Close(); utils::TCPSocket::Finalize(); @@ -67,7 +81,8 @@ void AllreduceBase::Shutdown(void) { void AllreduceBase::SetParam(const char *name, const char *val) { if (!strcmp(name, "master_uri")) master_uri = val; if (!strcmp(name, "master_port")) master_port = atoi(val); - if (!strcmp(name, "job_id")) job_id = val; + if (!strcmp(name, "task_id")) task_id = val; + if (!strcmp(name, "hadoop_mode")) hadoop_mode = atoi(val); if (!strcmp(name, "reduce_buffer")) { char unit; unsigned long amount; @@ -104,7 +119,7 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); utils::Assert(master.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); - master.SendStr(job_id); + master.SendStr(task_id); master.SendStr(std::string(cmd)); {// get new ranks int newrank; diff --git a/src/allreduce_base.h b/src/allreduce_base.h index 29e05f8e5..8ebd3ad3a 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -285,6 +285,8 @@ class AllreduceBase : public IEngine { ReturnType TryBroadcast(void *sendrecvbuf_, size_t size, int root); //---- data structure related to model ---- int version_number; + // whether the job is running in hadoop + int hadoop_mode; //---- local data related to link ---- // index of parent link, can be -1, meaning this is root of the tree int parent_index; @@ -297,7 +299,7 @@ class AllreduceBase : public IEngine { //----- meta information----- // unique identifier of the possible job this process is doing // used to assign ranks, optional, default to NULL - std::string job_id; + std::string task_id; // uri of current host, to be set by Init std::string host_uri; // uri of master From 7765e2dc553a2432244272b2bc125769218d92dc Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 5 Dec 2014 09:49:26 -0800 Subject: [PATCH 086/531] add status report --- src/allreduce_base.h | 14 ++++++++++++++ src/allreduce_robust.cc | 1 + src/allreduce_robust.h | 3 --- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/allreduce_base.h b/src/allreduce_base.h index 8ebd3ad3a..e972551f8 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -144,6 +144,16 @@ class AllreduceBase : public IEngine { virtual void InitAfterException(void) { utils::Error("InitAfterException: not implemented"); } + /*! + * \brief report current status to the job tracker + * depending on the job tracker we are in + */ + inline void ReportStatus(void) const { + if (hadoop_mode != 0) { + fprintf(stderr, "reporter:status:Rabit Phase[%03d] Operation %03d\n", + version_number, seq_counter); + } + } protected: /*! \brief enumeration of possible returning results from Try functions */ @@ -284,6 +294,10 @@ class AllreduceBase : public IEngine { */ ReturnType TryBroadcast(void *sendrecvbuf_, size_t size, int root); //---- data structure related to model ---- + // call sequence counter, records how many calls we made so far + // from last call to CheckPoint, LoadCheckPoint + int seq_counter; + // version number of model int version_number; // whether the job is running in hadoop int hadoop_mode; diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 3232ef40c..0f30ae1aa 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -645,6 +645,7 @@ bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { // request ActionSummary req(flag, seqno); while (true) { + this->ReportStatus(); // action ActionSummary act = req; // get the reduced action diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index 45820a017..2f183ef13 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -402,9 +402,6 @@ class AllreduceRobust : public AllreduceBase { size_t out_index) ); //---- recovery data structure ---- - // call sequence counter, records how many calls we made so far - // from last call to CheckPoint, LoadCheckPoint - int seq_counter; // the round of result buffer, used to mode the result int result_buffer_round; // result buffer of all reduce From d9c22e54deb125dc5d38dd0b64906d72491f0ddc Mon Sep 17 00:00:00 2001 From: nachocano Date: Fri, 5 Dec 2014 13:28:42 -0800 Subject: [PATCH 087/531] closer, but still does not work... stays in map 100%. I think an exception is being thrown --- submit_job_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submit_job_hadoop.py b/submit_job_hadoop.py index fbb83a79f..d7ef17ff5 100755 --- a/submit_job_hadoop.py +++ b/submit_job_hadoop.py @@ -22,7 +22,7 @@ parser.add_argument('-itr', '--iterations', required=True, type=int) args = parser.parse_args() def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -input %s -output %s -mapper \"%s %s\" -reducer /bin/cat stdin %d %d stdout' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, ' '.join(slave_args), args.nclusters, args.iterations) + cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer /bin/cat -file %s' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper) print cmd subprocess.check_call(cmd, shell = True) From 54eb5623cb53aad89a9f34f024b4e690a6315d6b Mon Sep 17 00:00:00 2001 From: nachocano Date: Fri, 5 Dec 2014 15:24:00 -0800 Subject: [PATCH 088/531] worked on my machine !!! finally --- submit_job_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submit_job_hadoop.py b/submit_job_hadoop.py index d7ef17ff5..0bd95644b 100755 --- a/submit_job_hadoop.py +++ b/submit_job_hadoop.py @@ -22,7 +22,7 @@ parser.add_argument('-itr', '--iterations', required=True, type=int) args = parser.parse_args() def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer /bin/cat -file %s' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper) + cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer \"/bin/cat\" -file %s' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper) print cmd subprocess.check_call(cmd, shell = True) From 67b68ceae68fe9cee467afe105a6c428824043c1 Mon Sep 17 00:00:00 2001 From: nachocano Date: Fri, 5 Dec 2014 16:00:47 -0800 Subject: [PATCH 089/531] adding timing --- submit_job_hadoop.py | 5 ++++- toolkit/kmeans.cpp | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/submit_job_hadoop.py b/submit_job_hadoop.py index 0bd95644b..bf1bac752 100755 --- a/submit_job_hadoop.py +++ b/submit_job_hadoop.py @@ -5,6 +5,7 @@ This is an example job submit script for hadoop streaming import argparse import sys import os +import time import subprocess sys.path.append('./src/') import rabit_master as master @@ -25,5 +26,7 @@ def hadoop_streaming(nslaves, slave_args): cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer \"/bin/cat\" -file %s' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper) print cmd subprocess.check_call(cmd, shell = True) - + +start = time.time() master.submit(args.nslaves, [], fun_submit= hadoop_streaming) +print 'All run took %s' % (time.time() - start) diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index 109b49826..674223cc6 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -3,6 +3,7 @@ #include #include #include "./toolkit_util.h" +#include using namespace rabit; @@ -85,6 +86,8 @@ int main(int argc, char *argv[]) { printf("Usage: num_cluster max_iter \n"); return 0; } + clock_t tStart = clock(); + srand(0); // load the data SparseMat data; @@ -140,6 +143,7 @@ int main(int argc, char *argv[]) { if (rabit::GetRank() == 0) { model.centroids.Print(argv[4]); } + utils::LogPrintf("[%d] Time taken: %f seconds\n", rabit::GetRank(), static_cast(clock() - tStart) / CLOCKS_PER_SEC); rabit::Finalize(); return 0; } From dc12958fc7172ff5e982be5cace8d108fd94f62d Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 6 Dec 2014 09:15:31 -0800 Subject: [PATCH 090/531] rename master to tracker, to emphasie rabit is p2p in computing --- src/{rabit_master.py => rabit_tracker.py} | 4 ++-- submit_job_hadoop.py => submit_hadoop.py | 4 ++-- submit_job.py => submit_mpi.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) rename src/{rabit_master.py => rabit_tracker.py} (99%) rename submit_job_hadoop.py => submit_hadoop.py (93%) rename submit_job.py => submit_mpi.py (91%) diff --git a/src/rabit_master.py b/src/rabit_tracker.py similarity index 99% rename from src/rabit_master.py rename to src/rabit_tracker.py index dbe303e39..ea77506be 100644 --- a/src/rabit_master.py +++ b/src/rabit_tracker.py @@ -112,7 +112,7 @@ class SlaveEntry: self.wait_accept = len(badset) - len(conset) return rmset -class Master: +class Tracker: def __init__(self, port = 9000, port_end = 9999): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) for port in range(port, port_end): @@ -172,7 +172,7 @@ def mpi_submit(nslave, args): return subprocess.check_call(cmd, shell = True) def submit(nslave, args, fun_submit = mpi_submit): - master = Master() + master = Tracker() submit_thread = Thread(target = fun_submit, args = (nslave, args + master.slave_args())) submit_thread.start() master.accept_slaves(nslave) diff --git a/submit_job_hadoop.py b/submit_hadoop.py similarity index 93% rename from submit_job_hadoop.py rename to submit_hadoop.py index bf1bac752..ae315f60a 100755 --- a/submit_job_hadoop.py +++ b/submit_hadoop.py @@ -8,7 +8,7 @@ import os import time import subprocess sys.path.append('./src/') -import rabit_master as master +import rabit_tracker as tracker parser = argparse.ArgumentParser(description='Hadoop Streaming submission script') parser.add_argument('-s', '--nslaves', required=True, type=int) @@ -28,5 +28,5 @@ def hadoop_streaming(nslaves, slave_args): subprocess.check_call(cmd, shell = True) start = time.time() -master.submit(args.nslaves, [], fun_submit= hadoop_streaming) +tracker.submit(args.nslaves, [], fun_submit= hadoop_streaming) print 'All run took %s' % (time.time() - start) diff --git a/submit_job.py b/submit_mpi.py similarity index 91% rename from submit_job.py rename to submit_mpi.py index 317ff6f43..14eafff8c 100755 --- a/submit_job.py +++ b/submit_mpi.py @@ -9,7 +9,7 @@ import subprocess # import the tcp_master.py # add path to sync sys.path.append(os.path.dirname(__file__)+'/src/') -import rabit_master as master +import rabit_tracker as tracker # # Note: this submit script is only used for example purpose @@ -33,4 +33,4 @@ if __name__ == '__main__': print 'Usage: ' exit(0) # call submit, with nslave, the commands to run each job and submit function - master.submit(int(sys.argv[1]), sys.argv[2:], fun_submit= mpi_submit) + tracker.submit(int(sys.argv[1]), sys.argv[2:], fun_submit= mpi_submit) From a569bf2698b3b2dd25265f9e4f42d366f214f8b3 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 6 Dec 2014 09:19:08 -0800 Subject: [PATCH 091/531] change gitignore --- .gitignore | 5 ++++- toolkit/.gitignore | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 toolkit/.gitignore diff --git a/.gitignore b/.gitignore index f087cd689..3bd3d6a61 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,7 @@ *.app *~ *.pyc -test_allreduce \ No newline at end of file +*.mpi +*.exe +*.txt +*tmp* diff --git a/toolkit/.gitignore b/toolkit/.gitignore new file mode 100644 index 000000000..5f8241b66 --- /dev/null +++ b/toolkit/.gitignore @@ -0,0 +1,2 @@ +kmeans +*.mpi From 19631ecef6bc3e431d4ef25c80f4d016959fdfdf Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 6 Dec 2014 09:24:12 -0800 Subject: [PATCH 092/531] more tracker renaming --- src/allreduce_base.cc | 92 +++++++++++++++++++++---------------------- src/allreduce_base.h | 12 +++--- src/rabit_tracker.py | 10 +++-- test/test.sh | 8 ---- 4 files changed, 58 insertions(+), 64 deletions(-) delete mode 100755 test/test.sh diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 6eb77b6dd..90a32dbee 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -15,8 +15,8 @@ namespace rabit { namespace engine { // constructor AllreduceBase::AllreduceBase(void) { - master_uri = "NULL"; - master_port = 9000; + tracker_uri = "NULL"; + tracker_port = 9000; host_uri = ""; slave_port = 9010; nport_trial = 1000; @@ -45,7 +45,7 @@ void AllreduceBase::Init(void) { utils::Socket::Startup(); utils::Assert(links.size() == 0, "can only call Init once"); this->host_uri = utils::SockAddr::GetHostName(); - // get information from master + // get information from tracker this->ReConnectLinks(); } @@ -55,22 +55,22 @@ void AllreduceBase::Shutdown(void) { } links.clear(); - if (master_uri == "NULL") return; + if (tracker_uri == "NULL") return; int magic = kMagic; - // notify master rank i have shutdown - utils::TCPSocket master; - master.Create(); - if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) { - utils::Socket::Error("Connect Master"); + // notify tracker rank i have shutdown + utils::TCPSocket tracker; + tracker.Create(); + if (!tracker.Connect(utils::SockAddr(tracker_uri.c_str(), tracker_port))) { + utils::Socket::Error("Connect Tracker"); } - utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 1"); - utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); - utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); + utils::Assert(tracker.SendAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 1"); + utils::Assert(tracker.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); + utils::Check(magic == kMagic, "sync::Invalid tracker message, init failure"); - utils::Assert(master.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); - master.SendStr(task_id); - master.SendStr(std::string("shutdown")); - master.Close(); + utils::Assert(tracker.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); + tracker.SendStr(task_id); + tracker.SendStr(std::string("shutdown")); + tracker.Close(); utils::TCPSocket::Finalize(); } /*! @@ -79,8 +79,8 @@ void AllreduceBase::Shutdown(void) { * \param val parameter value */ void AllreduceBase::SetParam(const char *name, const char *val) { - if (!strcmp(name, "master_uri")) master_uri = val; - if (!strcmp(name, "master_port")) master_port = atoi(val); + if (!strcmp(name, "rabit_tracker_uri")) tracker_uri = val; + if (!strcmp(name, "rabit_tracker_port")) tracker_port = atoi(val); if (!strcmp(name, "task_id")) task_id = val; if (!strcmp(name, "hadoop_mode")) hadoop_mode = atoi(val); if (!strcmp(name, "reduce_buffer")) { @@ -100,34 +100,34 @@ void AllreduceBase::SetParam(const char *name, const char *val) { } } /*! - * \brief connect to the master to fix the the missing links + * \brief connect to the tracker to fix the the missing links * this function is also used when the engine start up */ void AllreduceBase::ReConnectLinks(const char *cmd) { // single node mode - if (master_uri == "NULL") { + if (tracker_uri == "NULL") { rank = 0; return; } int magic = kMagic; - // get information from master - utils::TCPSocket master; - master.Create(); - if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) { + // get information from tracker + utils::TCPSocket tracker; + tracker.Create(); + if (!tracker.Connect(utils::SockAddr(tracker_uri.c_str(), tracker_port))) { utils::Socket::Error("Connect"); } - utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 1"); - utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); - utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); - utils::Assert(master.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); - master.SendStr(task_id); - master.SendStr(std::string(cmd)); + utils::Assert(tracker.SendAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 1"); + utils::Assert(tracker.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); + utils::Check(magic == kMagic, "sync::Invalid tracker message, init failure"); + utils::Assert(tracker.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); + tracker.SendStr(task_id); + tracker.SendStr(std::string(cmd)); {// get new ranks int newrank; - utils::Assert(master.RecvAll(&newrank, sizeof(newrank)) == sizeof(newrank), + utils::Assert(tracker.RecvAll(&newrank, sizeof(newrank)) == sizeof(newrank), "ReConnectLink failure 4"); - utils::Assert(master.RecvAll(&parent_rank, sizeof(parent_rank)) == sizeof(parent_rank), + utils::Assert(tracker.RecvAll(&parent_rank, sizeof(parent_rank)) == sizeof(parent_rank), "ReConnectLink failure 4"); - utils::Assert(master.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), + utils::Assert(tracker.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), "ReConnectLink failure 4"); utils::Assert(rank == -1 || newrank == rank, "must keep rank to same if the node already have one"); rank = newrank; @@ -139,7 +139,7 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { utils::Check(port != -1, "ReConnectLink fail to bind the ports specified"); sock_listen.Listen(); - // get number of to connect and number of to accept nodes from master + // get number of to connect and number of to accept nodes from tracker int num_conn, num_accept, num_error = 1; do { // send over good links @@ -152,24 +152,24 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { } } int ngood = static_cast(good_link.size()); - utils::Assert(master.SendAll(&ngood, sizeof(ngood)) == sizeof(ngood), + utils::Assert(tracker.SendAll(&ngood, sizeof(ngood)) == sizeof(ngood), "ReConnectLink failure 5"); for (size_t i = 0; i < good_link.size(); ++i) { - utils::Assert(master.SendAll(&good_link[i], sizeof(good_link[i])) == sizeof(good_link[i]), + utils::Assert(tracker.SendAll(&good_link[i], sizeof(good_link[i])) == sizeof(good_link[i]), "ReConnectLink failure 6"); } - utils::Assert(master.RecvAll(&num_conn, sizeof(num_conn)) == sizeof(num_conn), + utils::Assert(tracker.RecvAll(&num_conn, sizeof(num_conn)) == sizeof(num_conn), "ReConnectLink failure 7"); - utils::Assert(master.RecvAll(&num_accept, sizeof(num_accept)) == sizeof(num_accept), + utils::Assert(tracker.RecvAll(&num_accept, sizeof(num_accept)) == sizeof(num_accept), "ReConnectLink failure 8"); num_error = 0; for (int i = 0; i < num_conn; ++i) { LinkRecord r; int hport, hrank; std::string hname; - master.RecvStr(&hname); - utils::Assert(master.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "ReConnectLink failure 9"); - utils::Assert(master.RecvAll(&hrank, sizeof(hrank)) == sizeof(hrank), "ReConnectLink failure 10"); + tracker.RecvStr(&hname); + utils::Assert(tracker.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "ReConnectLink failure 9"); + utils::Assert(tracker.RecvAll(&hrank, sizeof(hrank)) == sizeof(hrank), "ReConnectLink failure 10"); r.sock.Create(); if (!r.sock.Connect(utils::SockAddr(hname.c_str(), hport))) { num_error += 1; r.sock.Close(); continue; @@ -186,12 +186,12 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { } if (!match) links.push_back(r); } - utils::Assert(master.SendAll(&num_error, sizeof(num_error)) == sizeof(num_error), "ReConnectLink failure 14"); + utils::Assert(tracker.SendAll(&num_error, sizeof(num_error)) == sizeof(num_error), "ReConnectLink failure 14"); } while (num_error != 0); - // send back socket listening port to master - utils::Assert(master.SendAll(&port, sizeof(port)) == sizeof(port), "ReConnectLink failure 14"); - // close connection to master - master.Close(); + // send back socket listening port to tracker + utils::Assert(tracker.SendAll(&port, sizeof(port)) == sizeof(port), "ReConnectLink failure 14"); + // close connection to tracker + tracker.Close(); // listen to incoming links for (int i = 0; i < num_accept; ++i) { LinkRecord r; diff --git a/src/allreduce_base.h b/src/allreduce_base.h index e972551f8..6eea948ce 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -260,9 +260,9 @@ class AllreduceBase : public IEngine { std::vector buffer_; }; /*! - * \brief connect to the master to fix the the missing links + * \brief connect to the tracker to fix the the missing links * this function is also used when the engine start up - * \param cmd possible command to sent to master + * \param cmd possible command to sent to tracker */ void ReConnectLinks(const char *cmd = "start"); /*! @@ -316,10 +316,10 @@ class AllreduceBase : public IEngine { std::string task_id; // uri of current host, to be set by Init std::string host_uri; - // uri of master - std::string master_uri; - // port of master address - int master_port; + // uri of tracker + std::string tracker_uri; + // port of tracker address + int tracker_port; // port of slave process int slave_port, nport_trial; // reduce buffer size diff --git a/src/rabit_tracker.py b/src/rabit_tracker.py index ea77506be..7ae53fd26 100644 --- a/src/rabit_tracker.py +++ b/src/rabit_tracker.py @@ -1,6 +1,8 @@ """ -Master script for rabit -Implements the master control protocol to start rabit jobs and assign necessary information +Tracker script for rabit +Implements the tracker control protocol + - start rabit jobs + - help nodes to establish links with each other Tianqi Chen """ @@ -128,8 +130,8 @@ class Tracker: def __del__(self): self.sock.close() def slave_args(self): - return ['master_uri=%s' % socket.gethostname(), - 'master_port=%s' % self.port] + return ['rabit_tracker_uri=%s' % socket.gethostname(), + 'rabit_tracker_port=%s' % self.port] def accept_slaves(self, nslave): # set of nodes that finishs the job shutdown = {} diff --git a/test/test.sh b/test/test.sh deleted file mode 100755 index c323785dd..000000000 --- a/test/test.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -if [ "$#" -lt 4 ]; -then - echo "Usage " - exit -1 -fi - -../submit_job.py $1 test_recover "${@:2}" From 0e012cb05e5685677289a3728e4e0a1bde8e12cd Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 6 Dec 2014 11:05:24 -0800 Subject: [PATCH 093/531] add speed test --- src/allreduce_robust.cc | 83 ++++++++++++++++++++++++++++------- src/allreduce_robust.h | 51 ++++++++++++++++------ src/rabit_tracker.py | 2 +- src/timer.h | 23 ++++++++++ test/Makefile | 15 ++++--- test/speed_test.cpp | 96 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 234 insertions(+), 36 deletions(-) create mode 100644 src/timer.h create mode 100644 test/speed_test.cpp diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 0f30ae1aa..eace31bb6 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -24,12 +24,12 @@ AllreduceRobust::AllreduceRobust(void) { void AllreduceRobust::Shutdown(void) { // need to sync the exec before we shutdown, do a pesudo check point // execute checkpoint, note: when checkpoint existing, load will not happen - utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kMaxSeq), + utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kSpecialOp), "check point must return true"); // reset result buffer resbuf.Clear(); seq_counter = 0; // execute check ack step, load happens here - utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kMaxSeq), + utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp), "check ack must return true"); AllreduceBase::Shutdown(); } @@ -133,7 +133,7 @@ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, utils::ISerializable *local_model) { utils::Check(local_model == NULL, "CheckPoint local_model is not yet supported"); // check if we succesfll - if (RecoverExec(NULL, 0, ActionSummary::kLoadCheck, ActionSummary::kMaxSeq)) { + if (RecoverExec(NULL, 0, ActionSummary::kLoadCheck, ActionSummary::kSpecialOp)) { // reset result buffer resbuf.Clear(); seq_counter = 0; // load from buffer @@ -142,7 +142,7 @@ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, if (version_number == 0) return version_number; global_model->Load(fs); // run another phase of check ack, if recovered from data - utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kMaxSeq), + utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp), "check ack must return true"); return version_number; } else { @@ -172,7 +172,7 @@ void AllreduceRobust::CheckPoint(const utils::ISerializable *global_model, const utils::ISerializable *local_model) { utils::Assert(local_model == NULL, "CheckPoint local model is not supported yet"); // execute checkpoint, note: when checkpoint existing, load will not happen - utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kMaxSeq), + utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kSpecialOp), "check point must return true"); // this is the critical region where we will change all the stored models // increase version number @@ -185,7 +185,7 @@ void AllreduceRobust::CheckPoint(const utils::ISerializable *global_model, // reset result buffer resbuf.Clear(); seq_counter = 0; // execute check ack step, load happens here - utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kMaxSeq), + utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp), "check ack must return true"); } /*! @@ -608,6 +608,10 @@ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) { */ AllreduceRobust::ReturnType AllreduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { RecoverType role; + // if minimum sequence requested is local check point ack, + // this means all nodes have finished local check point, directly return + if (seqno == ActionSummary::kLocalCheckAck) return kSuccess; + if (!requester) { sendrecvbuf = resbuf.Query(seqno, &size); role = sendrecvbuf != NULL ? kHaveData : kPassData; @@ -631,7 +635,7 @@ AllreduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool re * \param size the total size of the buffer * \param flag flag information about the action \sa ActionSummary * \param seqno sequence number of the action, if it is special action with flag set, - * seqno needs to be set to ActionSummary::kMaxSeq + * seqno needs to be set to ActionSummary::kSpecialOp * * \return if this function can return true or false * - true means buf already set to the @@ -640,7 +644,7 @@ AllreduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool re */ bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { if (flag != 0) { - utils::Assert(seqno == ActionSummary::kMaxSeq, "must only set seqno for normal operations"); + utils::Assert(seqno == ActionSummary::kSpecialOp, "must only set seqno for normal operations"); } // request ActionSummary req(flag, seqno); @@ -672,7 +676,7 @@ bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { } else { if (act.check_point()) { if (act.diff_seq()) { - utils::Assert(act.min_seqno() != ActionSummary::kMaxSeq, "min seq bug"); + utils::Assert(act.min_seqno() != ActionSummary::kSpecialOp, "min seq bug"); bool requester = req.min_seqno() == act.min_seqno(); if (!CheckAndRecover(TryGetResult(buf, size, act.min_seqno(), requester))) continue; if (requester) return true; @@ -691,7 +695,7 @@ bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { if (req.load_check()) return true; } else { // no special flags, no checkpoint, check ack, load_check - utils::Assert(act.min_seqno() != ActionSummary::kMaxSeq, "min seq bug"); + utils::Assert(act.min_seqno() != ActionSummary::kSpecialOp, "min seq bug"); if (act.diff_seq()) { bool requester = req.min_seqno() == act.min_seqno(); if (!CheckAndRecover(TryGetResult(buf, size, act.min_seqno(), requester))) continue; @@ -708,7 +712,54 @@ bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { utils::Assert(false, "RecoverExec: should not reach here"); return true; } - +/*! + * \brief try to recover the local state, making each local state to be the result of itself + * plus replication of states in previous num_local_replica hops in the ring + * + * The input parameters must contain the valid local states available in current nodes, + * This function try ist best to "complete" the missing parts of local_rptr and local_chkpt + * If there is sufficient information in the ring, when the function returns, local_chkpt will + * contain num_local_replica + 1 checkpoints (including the chkpt of this node) + * If there is no sufficient information in the ring, this function the number of checkpoints + * will be less than the specified value + * + * \param p_local_rptr the pointer to the segment pointers in the states array + * \param p_local_chkpt the pointer to the storage of local check points + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType + */ +AllreduceRobust::ReturnType +AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, + std::string *p_local_chkpt) { + // if there is no local replica, we can do nothing + if (num_local_replica == 0) return kSuccess; + std::vector &rptr = *p_local_rptr; + std::string &chkpt = *p_local_chkpt; + if (rptr.size() == 0) { + rptr.push_back(0); + utils::Assert(chkpt.length() == 0, "local chkpt space inconsistent"); + } + const int n = num_local_replica; + // message send to previous link + { + int msg_forward[2]; + int nlocal = static_cast(rptr.size() - 1); + msg_forward[0] = nlocal; + utils::Assert(msg_forward[0] <= n, "invalid local replica"); + // backward passing one hop the request + ReturnType succ = RingPassing(msg_forward, + 1 * sizeof(int), 2 * sizeof(int), + 0 * sizeof(int), 1 * sizeof(int), + ring_prev, ring_next); + if (succ != kSuccess) return succ; + // check how much current node can help with the request + // if (nlocal > ) { + + //} + + } + return kSuccess; +} /*! * \brief perform a ring passing to receive data from prev link, and sent data to next link * this allows data to stream over a ring structure @@ -723,8 +774,8 @@ bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { * \param read_end the ending position to read * \param write_ptr the initial write pointer * \param write_end the ending position to write - * \param prev pointer to link to previous position in ring - * \param prev pointer to link of next position in ring + * \param read_link pointer to link to previous position in ring + * \param write_link pointer to link of next position in ring */ AllreduceRobust::ReturnType AllreduceRobust::RingPassing(void *sendrecvbuf_, @@ -732,14 +783,14 @@ AllreduceRobust::RingPassing(void *sendrecvbuf_, size_t read_end, size_t write_ptr, size_t write_end, - LinkRecord *prev_link, - LinkRecord *next_link) { + LinkRecord *read_link, + LinkRecord *write_link) { if (links.size() == 0 || read_end == 0) return kSuccess; utils::Assert(read_end <= write_end, "boundary check"); utils::Assert(read_ptr <= read_end, "boundary check"); utils::Assert(write_ptr <= write_end, "boundary check"); // take reference - LinkRecord &prev = *prev_link, &next = *next_link; + LinkRecord &prev = *read_link, &next = *write_link; // send recv buffer char *buf = reinterpret_cast(sendrecvbuf_); while (true) { diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index 2f183ef13..570960e52 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -122,7 +122,11 @@ class AllreduceRobust : public AllreduceBase { */ struct ActionSummary { // maximumly allowed sequence id - const static int kMaxSeq = 1 << 26; + const static int kSpecialOp = (1 << 26); + // special sequence number for local state checkpoint + const static int kLocalCheckPoint = (1 << 26) - 2; + // special sequnce number for local state checkpoint ack signal + const static int kLocalCheckAck = (1 << 26) - 1; //--------------------------------------------- // The following are bit mask of flag used in //---------------------------------------------- @@ -140,7 +144,7 @@ class AllreduceRobust : public AllreduceBase { // constructor ActionSummary(void) {} // constructor of action - ActionSummary(int flag, int minseqno = kMaxSeq) { + ActionSummary(int flag, int minseqno = kSpecialOp) { seqcode = (minseqno << 4) | flag; } // minimum number of all operations @@ -277,14 +281,14 @@ class AllreduceRobust : public AllreduceBase { * \param buf the buffer to store the result * \param size the total size of the buffer * \param flag flag information about the action \sa ActionSummary - * \param seqno sequence number of the action, if it is special action with flag set, seqno needs to be set to ActionSummary::kMaxSeq + * \param seqno sequence number of the action, if it is special action with flag set, seqno needs to be set to ActionSummary::kSpecialOp * * \return if this function can return true or false * - true means buf already set to the * result by recovering procedure, the action is complete, no further action is needed * - false means this is the lastest action that has not yet been executed, need to execute the action */ - bool RecoverExec(void *buf, size_t size, int flag, int seqno = ActionSummary::kMaxSeq); + bool RecoverExec(void *buf, size_t size, int flag, int seqno = ActionSummary::kSpecialOp); /*! * \brief try to load check point * @@ -344,12 +348,30 @@ class AllreduceRobust : public AllreduceBase { * * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details * \sa ReturnType, TryDecideRouting - */ + */ ReturnType TryRecoverData(RecoverType role, void *sendrecvbuf_, size_t size, int recv_link, const std::vector &req_in); + /*! + * \brief try to recover the local state, making each local state to be the result of itself + * plus replication of states in previous num_local_replica hops in the ring + * + * The input parameters must contain the valid local states available in current nodes, + * This function try ist best to "complete" the missing parts of local_rptr and local_chkpt + * If there is sufficient information in the ring, when the function returns, local_chkpt will + * contain num_local_replica + 1 checkpoints (including the chkpt of this node) + * If there is no sufficient information in the ring, this function the number of checkpoints + * will be less than the specified value + * + * \param p_local_rptr the pointer to the segment pointers in the states array + * \param p_local_chkpt the pointer to the storage of local check points + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType + */ + ReturnType TryRecoverLocalState(std::vector *p_local_rptr, + std::string *p_local_chkpt); /*! * \brief perform a ring passing to receive data from prev link, and sent data to next link * this allows data to stream over a ring structure @@ -364,16 +386,16 @@ class AllreduceRobust : public AllreduceBase { * \param read_end the ending position to read * \param write_ptr the initial write pointer * \param write_end the ending position to write - * \param prev pointer to link to previous position in ring - * \param prev pointer to link of next position in ring + * \param read_link pointer to link to previous position in ring + * \param write_link pointer to link of next position in ring */ ReturnType RingPassing(void *senrecvbuf_, size_t read_ptr, size_t read_end, size_t write_ptr, size_t write_end, - LinkRecord *prev_link, - LinkRecord *next_link); + LinkRecord *read_link, + LinkRecord *write_link); /*! * \brief run message passing algorithm on the allreduce tree * the result is edge message stored in p_edge_in and p_edge_out @@ -410,15 +432,18 @@ class AllreduceRobust : public AllreduceBase { std::string global_checkpoint; // number of replica for local state/model int num_local_replica; + // --- recovery data structure for local checkpoint + // there is two version of the data structure, + // at one time one version is valid and another is used as temp memory // pointer to memory position in the local model // local model is stored in CSR format(like a sparse matrices) // local_model[rptr[0]:rptr[1]] stores the model of current node // local_model[rptr[k]:rptr[k+1]] stores the model of node in previous k hops in the ring - std::vector local_rptr; + std::vector local_rptr[2]; // storage for local model replicas - std::string local_checkpoint; - // temporal storage for doing local checkpointing - std::string tmp_local_check; + std::string local_checkpoint[2]; + // version of local checkpoint can be 1 or 0 + int local_chkpt_version; }; } // namespace engine } // namespace rabit diff --git a/src/rabit_tracker.py b/src/rabit_tracker.py index 7ae53fd26..ceda6347f 100644 --- a/src/rabit_tracker.py +++ b/src/rabit_tracker.py @@ -166,7 +166,7 @@ class Tracker: s.assign_rank(rank, wait_conn, nslave) if s.wait_accept > 0: wait_conn[rank] = s - print 'all slaves setup complete' + print 'All nodes finishes job' def mpi_submit(nslave, args): cmd = ' '.join(['mpirun -n %d' % nslave] + args) diff --git a/src/timer.h b/src/timer.h new file mode 100644 index 000000000..c0c83c1c8 --- /dev/null +++ b/src/timer.h @@ -0,0 +1,23 @@ +/*! + * \file timer.h + * \brief This file defines the utils for timing + * \author Tianqi Chen, Nacho, Tianyi + */ +#ifndef RABIT_TIMER_H +#define RABIT_TIMER_H +#include +#include "./utils.h" + +namespace rabit { +namespace utils { +/*! + * \brief return time in seconds + */ +inline double GetTime(void) { + timespec ts; + utils::Check(clock_gettime(CLOCK_REALTIME, &ts) == 0, "failed to get time"); + return static_cast(ts.tv_sec) + static_cast(ts.tv_nsec) * 1e-9; +} +} +} +#endif diff --git a/test/Makefile b/test/Makefile index bd14fff97..f10229a1f 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,17 +1,17 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx -export LDFLAGS= -pthread -lm +export LDFLAGS= -pthread -lm -lrt export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src # specify tensor path -BIN = test_allreduce test_recover test_model_recover +BIN = test_allreduce test_recover test_model_recover speed_test # objectives that makes up rabit library RABIT_OBJ = allreduce_base.o allreduce_robust.o engine.o MPIOBJ = engine_mpi.o -OBJ = $(RABIT_OBJ) test_allreduce.o test_recover.o test_model_recover.o -MPIBIN = test_allreduce.mpi +OBJ = $(RABIT_OBJ) test_allreduce.o test_recover.o test_model_recover.o speed_test.o +MPIBIN = test_allreduce.mpi speed_test.mpi .PHONY: clean all all: $(BIN) $(MPIBIN) @@ -21,23 +21,26 @@ engine.o: ../src/engine.cc ../src/*.h allreduce_robust.o: ../src/allreduce_robust.cc ../src/*.h engine_mpi.o: ../src/engine_mpi.cc test_allreduce.o: test_allreduce.cpp ../src/*.h +speed_test.o: speed_test.cpp ../src/*.h test_recover.o: test_recover.cpp ../src/*.h test_model_recover.o: test_model_recover.cpp ../src/*.h # we can link against MPI version to get use MPI test_allreduce: test_allreduce.o $(RABIT_OBJ) test_allreduce.mpi: test_allreduce.o $(MPIOBJ) +speed_test: speed_test.o $(RABIT_OBJ) +speed_test.mpi: speed_test.o $(MPIOBJ) test_recover: test_recover.o $(RABIT_OBJ) test_model_recover: test_model_recover.o $(RABIT_OBJ) $(BIN) : - $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) $(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) $(MPIBIN) : - $(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) + $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) $(MPIOBJ) : $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) diff --git a/test/speed_test.cpp b/test/speed_test.cpp new file mode 100644 index 000000000..25033a54a --- /dev/null +++ b/test/speed_test.cpp @@ -0,0 +1,96 @@ +#include +#include +#include +#include +#include +#include +#include + +using namespace rabit; + +double max_tdiff, sum_tdiff, bcast_tdiff, tot_tdiff; + +inline void TestMax(size_t n) { + int rank = rabit::GetRank(); + //int nproc = rabit::GetWorldSize(); + + std::vector ndata(n); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % 111; + } + double tstart = utils::GetTime(); + rabit::Allreduce(&ndata[0], ndata.size()); + max_tdiff += utils::GetTime() - tstart; +} + +inline void TestSum(size_t n) { + int rank = rabit::GetRank(); + //int nproc = rabit::GetWorldSize(); + const int z = 131; + std::vector ndata(n); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % z; + } + double tstart = utils::GetTime(); + rabit::Allreduce(&ndata[0], ndata.size()); + sum_tdiff += utils::GetTime() - tstart; +} + +inline void TestBcast(size_t n, int root) { + int rank = rabit::GetRank(); + std::string s; s.resize(n); + for (size_t i = 0; i < n; ++i) { + s[i] = char(i % 126 + 1); + } + std::string res; + if (root == rank) { + res = s; + } + double tstart = utils::GetTime(); + rabit::Broadcast(&res, root); + bcast_tdiff += utils::GetTime() - tstart; +} + +inline void PrintStats(const char *name, double tdiff) { + int nproc = rabit::GetWorldSize(); + double tsum = tdiff; + rabit::Allreduce(&tsum, 1); + double tavg = tsum / nproc; + double tsqr = tdiff - tavg; + tsqr *= tsqr; + rabit::Allreduce(&tsqr, 1); + double tstd = sqrt(tsqr / nproc); + if (rabit::GetRank() == 0) { + utils::LogPrintf("%s: mean=%g, std=%g sec\n", name, tavg, tstd); + } +} + +int main(int argc, char *argv[]) { + if (argc < 3) { + printf("Usage: \n"); + return 0; + } + srand(0); + int n = atoi(argv[1]); + int nrep = atoi(argv[2]); + utils::Check(nrep >= 1, "need to at least repeat running once"); + rabit::Init(argc, argv); + //int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + std::string name = rabit::GetProcessorName(); + max_tdiff = sum_tdiff = bcast_tdiff = 0; + double tstart = utils::GetTime(); + for (int i = 0; i < nrep; ++i) { + TestMax(n); + TestSum(n); + TestBcast(n, rand() % nproc); + } + tot_tdiff = utils::GetTime() - tstart; + // use allreduce to get the sum and std of time + PrintStats("max_tdiff", max_tdiff); + PrintStats("sum_tdiff", sum_tdiff); + PrintStats("bcast_tdiff", bcast_tdiff); + PrintStats("tot_tdiff", tot_tdiff); + rabit::Finalize(); + return 0; +} From 1519f74f3cae68ea76e107356e93b334f0b1bc1a Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 6 Dec 2014 11:20:52 -0800 Subject: [PATCH 094/531] ok --- test/speed_test.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/speed_test.cpp b/test/speed_test.cpp index 25033a54a..d171c7dc1 100644 --- a/test/speed_test.cpp +++ b/test/speed_test.cpp @@ -51,7 +51,7 @@ inline void TestBcast(size_t n, int root) { bcast_tdiff += utils::GetTime() - tstart; } -inline void PrintStats(const char *name, double tdiff) { +inline void PrintStats(const char *name, double tdiff, int n, int nrep, size_t size) { int nproc = rabit::GetWorldSize(); double tsum = tdiff; rabit::Allreduce(&tsum, 1); @@ -62,6 +62,11 @@ inline void PrintStats(const char *name, double tdiff) { double tstd = sqrt(tsqr / nproc); if (rabit::GetRank() == 0) { utils::LogPrintf("%s: mean=%g, std=%g sec\n", name, tavg, tstd); + double ndata = n; + ndata *= nrep * size; + if (n != 0) { + utils::LogPrintf("%s-speed: %g MB/sec\n", name, (ndata / tavg) / 1024 / 1024 ); + } } } @@ -87,10 +92,10 @@ int main(int argc, char *argv[]) { } tot_tdiff = utils::GetTime() - tstart; // use allreduce to get the sum and std of time - PrintStats("max_tdiff", max_tdiff); - PrintStats("sum_tdiff", sum_tdiff); - PrintStats("bcast_tdiff", bcast_tdiff); - PrintStats("tot_tdiff", tot_tdiff); + PrintStats("max_tdiff", max_tdiff, n, nrep, sizeof(float)); + PrintStats("sum_tdiff", sum_tdiff, n, nrep, sizeof(float)); + PrintStats("bcast_tdiff", bcast_tdiff, n, nrep, sizeof(char)); + PrintStats("tot_tdiff", tot_tdiff, 0, nrep, sizeof(float)); rabit::Finalize(); return 0; } From 4a7d84e861eca44d3505fcaffe1f1317873fb9a0 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 6 Dec 2014 11:25:08 -0800 Subject: [PATCH 095/531] chg string bcast --- test/speed_test.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/speed_test.cpp b/test/speed_test.cpp index d171c7dc1..c4410a562 100644 --- a/test/speed_test.cpp +++ b/test/speed_test.cpp @@ -38,16 +38,17 @@ inline void TestSum(size_t n) { inline void TestBcast(size_t n, int root) { int rank = rabit::GetRank(); - std::string s; s.resize(n); + std::string s; s.resize(n); for (size_t i = 0; i < n; ++i) { s[i] = char(i % 126 + 1); } - std::string res; + std::string res; + res.resize(n); if (root == rank) { res = s; } double tstart = utils::GetTime(); - rabit::Broadcast(&res, root); + rabit::Broadcast(&res[0], res.length(), root); bcast_tdiff += utils::GetTime() - tstart; } From 14e400226ad50a262b388f256d8bfd619ee09a7b Mon Sep 17 00:00:00 2001 From: nachocano Date: Sat, 6 Dec 2014 11:33:05 -0800 Subject: [PATCH 096/531] submit mpi to include machine file --- submit_mpi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/submit_mpi.py b/submit_mpi.py index 14eafff8c..83c97175e 100755 --- a/submit_mpi.py +++ b/submit_mpi.py @@ -24,13 +24,13 @@ def mpi_submit(nslave, args): args arguments to launch each job this usually includes the parameters of master_uri and parameters passed into submit """ - cmd = ' '.join(['mpirun -n %d' % nslave] + args) + cmd = ' '.join(['mpirun -n %d -machinefile %s' % (nslave, args[0])] + args[1:]) print cmd subprocess.check_call(cmd, shell = True) if __name__ == '__main__': if len(sys.argv) < 2: - print 'Usage: ' + print 'Usage: ' exit(0) # call submit, with nslave, the commands to run each job and submit function tracker.submit(int(sys.argv[1]), sys.argv[2:], fun_submit= mpi_submit) From f203d13efca7dd7e717544c6b9ed2d2743f14536 Mon Sep 17 00:00:00 2001 From: nachocano Date: Sat, 6 Dec 2014 11:59:16 -0800 Subject: [PATCH 097/531] speed runner --- test/speed_runner.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 test/speed_runner.py diff --git a/test/speed_runner.py b/test/speed_runner.py new file mode 100644 index 000000000..eba97c681 --- /dev/null +++ b/test/speed_runner.py @@ -0,0 +1,24 @@ +import os +import argparse + +def main(): + parser = argparse.ArgumentParser(description='TODO') + parser.add_argument('-h', '--host_dir', required=True) + parser.add_argument('-s', '--submit_script', required=True) + args = parser.parse_args() + + ndata = [10^4, 10^5, 10^6, 10^7, 10^8] + nrepeat = [10^2, 10^3, 10^4, 10^5] + + machines = [2,4,8,16,31] + + for data in ndata: + for repeat in nrepeat: + for machine in machines: + host_file = os.path.join(args.host_dir, 'host%d' % machine) + cmd = 'python %s %d %s %d %d' % (args.submit_script, machine, host_file, data, repeat) + print 'data=%d, repeat=%d, machine=%d' % (data, repeat, machine) + os.system(cmd) + +if __name__ == "__main__": + main() \ No newline at end of file From 8f0d7d1d3ef793777f238be789e742f8d68166ef Mon Sep 17 00:00:00 2001 From: nachocano Date: Sat, 6 Dec 2014 12:01:05 -0800 Subject: [PATCH 098/531] changing to -ho not to conflict with help --- test/speed_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/speed_runner.py b/test/speed_runner.py index eba97c681..94d4f2d87 100644 --- a/test/speed_runner.py +++ b/test/speed_runner.py @@ -3,7 +3,7 @@ import argparse def main(): parser = argparse.ArgumentParser(description='TODO') - parser.add_argument('-h', '--host_dir', required=True) + parser.add_argument('-ho', '--host_dir', required=True) parser.add_argument('-s', '--submit_script', required=True) args = parser.parse_args() From e0053c62e1e18bfef85c9e4db55be3d6e4b909df Mon Sep 17 00:00:00 2001 From: nachocano Date: Sat, 6 Dec 2014 12:05:08 -0800 Subject: [PATCH 099/531] adding executable --- test/speed_runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/speed_runner.py b/test/speed_runner.py index 94d4f2d87..8dcae7f9a 100644 --- a/test/speed_runner.py +++ b/test/speed_runner.py @@ -5,6 +5,7 @@ def main(): parser = argparse.ArgumentParser(description='TODO') parser.add_argument('-ho', '--host_dir', required=True) parser.add_argument('-s', '--submit_script', required=True) + parser.add_argument('-ex', '--executable', required=True) args = parser.parse_args() ndata = [10^4, 10^5, 10^6, 10^7, 10^8] @@ -16,7 +17,7 @@ def main(): for repeat in nrepeat: for machine in machines: host_file = os.path.join(args.host_dir, 'host%d' % machine) - cmd = 'python %s %d %s %d %d' % (args.submit_script, machine, host_file, data, repeat) + cmd = 'python %s %d %s %s %d %d' % (args.submit_script, machine, host_file, args.executable, data, repeat) print 'data=%d, repeat=%d, machine=%d' % (data, repeat, machine) os.system(cmd) From 9ed59e71f632eec793a82ddd7c94d3df93caabec Mon Sep 17 00:00:00 2001 From: nachocano Date: Sat, 6 Dec 2014 12:09:40 -0800 Subject: [PATCH 100/531] speed runner --- test/speed_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/speed_runner.py b/test/speed_runner.py index 8dcae7f9a..8e6bfa404 100644 --- a/test/speed_runner.py +++ b/test/speed_runner.py @@ -16,7 +16,7 @@ def main(): for data in ndata: for repeat in nrepeat: for machine in machines: - host_file = os.path.join(args.host_dir, 'host%d' % machine) + host_file = os.path.join(args.host_dir, 'hosts%d' % machine) cmd = 'python %s %d %s %s %d %d' % (args.submit_script, machine, host_file, args.executable, data, repeat) print 'data=%d, repeat=%d, machine=%d' % (data, repeat, machine) os.system(cmd) From 52d472c209054149975f891dfe6e9372ec1ebc51 Mon Sep 17 00:00:00 2001 From: root Date: Sat, 6 Dec 2014 20:30:35 +0000 Subject: [PATCH 101/531] using hostfile --- submit_mpi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submit_mpi.py b/submit_mpi.py index 83c97175e..3a65ec440 100755 --- a/submit_mpi.py +++ b/submit_mpi.py @@ -24,7 +24,7 @@ def mpi_submit(nslave, args): args arguments to launch each job this usually includes the parameters of master_uri and parameters passed into submit """ - cmd = ' '.join(['mpirun -n %d -machinefile %s' % (nslave, args[0])] + args[1:]) + cmd = ' '.join(['mpirun -n %d --hostfile %s' % (nslave, args[0])] + args[1:]) print cmd subprocess.check_call(cmd, shell = True) From 659b9cd517313757597484d7f3e0301929a019a9 Mon Sep 17 00:00:00 2001 From: nachocano Date: Sat, 6 Dec 2014 15:14:14 -0800 Subject: [PATCH 102/531] changing number of repetitions --- test/speed_runner.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/test/speed_runner.py b/test/speed_runner.py index 8e6bfa404..a8951fc66 100644 --- a/test/speed_runner.py +++ b/test/speed_runner.py @@ -8,18 +8,17 @@ def main(): parser.add_argument('-ex', '--executable', required=True) args = parser.parse_args() - ndata = [10^4, 10^5, 10^6, 10^7, 10^8] - nrepeat = [10^2, 10^3, 10^4, 10^5] + ndata = [10**4, 10**5, 10**6, 10**7] + nrepeat = [10**4, 10**3, 10**2, 10] machines = [2,4,8,16,31] - for data in ndata: - for repeat in nrepeat: - for machine in machines: - host_file = os.path.join(args.host_dir, 'hosts%d' % machine) - cmd = 'python %s %d %s %s %d %d' % (args.submit_script, machine, host_file, args.executable, data, repeat) - print 'data=%d, repeat=%d, machine=%d' % (data, repeat, machine) - os.system(cmd) + for i, data in enumerate(ndata): + for machine in machines: + host_file = os.path.join(args.host_dir, 'hosts%d' % machine) + cmd = 'python %s %d %s %s %d %d' % (args.submit_script, machine, host_file, args.executable, data, nrepeat[i]) + print 'data=%d, repeat=%d, machine=%d' % (data, nrepeat[i], machine) + os.system(cmd) if __name__ == "__main__": main() \ No newline at end of file From fcf2f0a03d8c928af3f142a10e2a1f8e135853af Mon Sep 17 00:00:00 2001 From: nachocano Date: Sat, 6 Dec 2014 15:22:29 -0800 Subject: [PATCH 103/531] to stderr --- test/speed_runner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/speed_runner.py b/test/speed_runner.py index a8951fc66..a5c459d82 100644 --- a/test/speed_runner.py +++ b/test/speed_runner.py @@ -1,5 +1,6 @@ import os import argparse +import sys def main(): parser = argparse.ArgumentParser(description='TODO') @@ -17,7 +18,8 @@ def main(): for machine in machines: host_file = os.path.join(args.host_dir, 'hosts%d' % machine) cmd = 'python %s %d %s %s %d %d' % (args.submit_script, machine, host_file, args.executable, data, nrepeat[i]) - print 'data=%d, repeat=%d, machine=%d' % (data, nrepeat[i], machine) + sys.stderr.write('data=%d, repeat=%d, machine=%d\n' % (data, nrepeat[i], machine)) + sys.stderr.flush() os.system(cmd) if __name__ == "__main__": From 20b03e781c127450ace0d779f0c65a027311d9ab Mon Sep 17 00:00:00 2001 From: nachocano Date: Sat, 6 Dec 2014 15:37:09 -0800 Subject: [PATCH 104/531] to run all executables --- test/speed_runner.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/test/speed_runner.py b/test/speed_runner.py index a5c459d82..7331c9075 100644 --- a/test/speed_runner.py +++ b/test/speed_runner.py @@ -6,7 +6,8 @@ def main(): parser = argparse.ArgumentParser(description='TODO') parser.add_argument('-ho', '--host_dir', required=True) parser.add_argument('-s', '--submit_script', required=True) - parser.add_argument('-ex', '--executable', required=True) + parser.add_argument('-rex', '--rabit_exec', required=True) + parser.add_argument('-mpi', '--mpi_exec', required=True) args = parser.parse_args() ndata = [10**4, 10**5, 10**6, 10**7] @@ -14,13 +15,20 @@ def main(): machines = [2,4,8,16,31] - for i, data in enumerate(ndata): - for machine in machines: - host_file = os.path.join(args.host_dir, 'hosts%d' % machine) - cmd = 'python %s %d %s %s %d %d' % (args.submit_script, machine, host_file, args.executable, data, nrepeat[i]) - sys.stderr.write('data=%d, repeat=%d, machine=%d\n' % (data, nrepeat[i], machine)) - sys.stderr.flush() - os.system(cmd) + executables = [args.rabit_exec, args.mpi_exec] + + for executable in executables: + sys.stderr.write('Executable %s' % executable) + sys.stderr.flush() + for i, data in enumerate(ndata): + for machine in machines: + host_file = os.path.join(args.host_dir, 'hosts%d' % machine) + cmd = 'python %s %d %s %s %d %d' % (args.submit_script, machine, host_file, executable, data, nrepeat[i]) + sys.stderr.write('data=%d, repeat=%d, machine=%d\n' % (data, nrepeat[i], machine)) + sys.stderr.flush() + os.system(cmd) + sys.stderr.write('\n') + sys.stderr.flush() if __name__ == "__main__": main() \ No newline at end of file From 328cf187bad60f8f04e1e872b79215beb89e8828 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 6 Dec 2014 23:00:10 -0800 Subject: [PATCH 105/531] check in the ring passing --- src/allreduce_robust.cc | 128 +++++++++++++++++++++++++++++++++++----- 1 file changed, 113 insertions(+), 15 deletions(-) diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index eace31bb6..dbb318c33 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -740,23 +740,121 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, utils::Assert(chkpt.length() == 0, "local chkpt space inconsistent"); } const int n = num_local_replica; - // message send to previous link - { - int msg_forward[2]; - int nlocal = static_cast(rptr.size() - 1); - msg_forward[0] = nlocal; - utils::Assert(msg_forward[0] <= n, "invalid local replica"); + {// backward passing, passing state in backward direction of the ring + const int nlocal = static_cast(rptr.size() - 1); + utils::Assert(nlocal <= n + 1, "invalid local replica"); + std::vector msg_back(n + 1); + msg_back[0] = nlocal; // backward passing one hop the request - ReturnType succ = RingPassing(msg_forward, - 1 * sizeof(int), 2 * sizeof(int), - 0 * sizeof(int), 1 * sizeof(int), - ring_prev, ring_next); + ReturnType succ; + succ = RingPassing(BeginPtr(msg_back), + 1 * sizeof(int), (n+1) * sizeof(int), + 0 * sizeof(int), n * sizeof(int), + ring_next, ring_prev); if (succ != kSuccess) return succ; - // check how much current node can help with the request - // if (nlocal > ) { - - //} - + int msg_forward[2]; + msg_forward[0] = nlocal; + succ = RingPassing(msg_forward, + 1 * sizeof(int), 2 * sizeof(int), + 0 * sizeof(int), 1 * sizeof(int), + ring_prev, ring_next); + if (succ != kSuccess) return succ; + // calculate the number of things we can read from next link + int nread_end = nlocal; + for (int i = 1; i <= n; ++i) { + nread_end = std::max(nread_end, msg_back[i] - i); + } + // gives the size of forward + int nwrite_start = std::min(msg_forward[1] + 1, nread_end); + // get the size of each segments + std::vector sizes(nread_end); + for (int i = 0; i < nlocal; ++i) { + sizes[i] = rptr[i + 1] - rptr[i]; + } + // pass size through the link + succ = RingPassing(BeginPtr(sizes), + nlocal * sizeof(size_t), + nread_end * sizeof(size_t), + nwrite_start * sizeof(size_t), + nread_end * sizeof(size_t), + ring_next, ring_prev); + if (succ != kSuccess) return succ; + // update rptr + rptr.resize(nread_end + 1); + for (int i = nlocal; i < nread_end; ++i) { + rptr[i + 1] = rptr[i] + sizes[i]; + } + chkpt.resize(rptr.back()); + // pass data through the link + succ = RingPassing(&chkpt[0], rptr[nlocal], rptr[nread_end], + rptr[nwrite_start], rptr[nread_end], + ring_next, ring_prev); + if (succ != kSuccess) { + rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ; + } + } + {// forward passing, passing state in forward direction of the ring + const int nlocal = static_cast(rptr.size() - 1); + utils::Assert(nlocal <= n + 1, "invalid local replica"); + std::vector msg_forward(n + 1); + msg_forward[0] = nlocal; + // backward passing one hop the request + ReturnType succ; + succ = RingPassing(BeginPtr(msg_forward), + 1 * sizeof(int), (n+1) * sizeof(int), + 0 * sizeof(int), n * sizeof(int), + ring_prev, ring_next); + if (succ != kSuccess) return succ; + int msg_back[2]; + msg_back[0] = nlocal; + succ = RingPassing(msg_back, + 1 * sizeof(int), 2 * sizeof(int), + 0 * sizeof(int), 1 * sizeof(int), + ring_next, ring_prev); + if (succ != kSuccess) return succ; + // calculate the number of things we can read from next link + int nread_end = nlocal, nwrite_end = 1; + // have to have itself in order to get other data from prev link + if (nlocal != 0) { + for (int i = 1; i <= n; ++i) { + if (msg_forward[i] == 0) break; + nread_end = std::max(nread_end, i + 1); + nwrite_end = i + 1; + } + if (nwrite_end > n) nwrite_end = n; + } else { + nread_end = 0; nwrite_end = 0; + } + // gives the size of forward + int nwrite_start = std::min(msg_back[1] - 1, nwrite_end); + // next node miss the state of itself, cannot recover + if (nwrite_start < 0) nwrite_start = nwrite_end = 0; + // get the size of each segments + std::vector sizes(nread_end); + for (int i = 0; i < nlocal; ++i) { + sizes[i] = rptr[i + 1] - rptr[i]; + } + // pass size through the link, check consistency + succ = RingPassing(BeginPtr(sizes), + nlocal * sizeof(size_t), + nread_end * sizeof(size_t), + nwrite_start * sizeof(size_t), + nwrite_end * sizeof(size_t), + ring_prev, ring_next); + if (succ != kSuccess) return succ; + // update rptr + rptr.resize(nread_end + 1); + for (int i = nlocal; i < nread_end; ++i) { + rptr[i + 1] = rptr[i] + sizes[i]; + } + chkpt.resize(rptr.back()); + // pass data through the link + succ = RingPassing(&chkpt[0], rptr[nlocal], rptr[nread_end], + rptr[nwrite_start], rptr[nwrite_end], + ring_prev, ring_next); + if (succ != kSuccess) { + rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ; + } } return kSuccess; } From e2adce1cc13aa454bbbc3b74790d530d3bb9e681 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 7 Dec 2014 16:09:28 -0800 Subject: [PATCH 106/531] add ring setup version --- src/allreduce_base.cc | 88 ++++++++++----- src/allreduce_base.h | 21 +++- src/allreduce_robust-inl.h | 1 + src/allreduce_robust.cc | 222 +++++++++++++++++++++++++++++-------- src/allreduce_robust.h | 19 +++- src/rabit_tracker.py | 68 +++++++++--- src/socket.h | 3 +- src/utils.h | 8 ++ submit_mpi.py | 5 +- 9 files changed, 334 insertions(+), 101 deletions(-) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 90a32dbee..72fa12e79 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -7,6 +7,7 @@ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX +#include #include #include #include "./allreduce_base.h" @@ -43,17 +44,18 @@ void AllreduceBase::Init(void) { } // start socket utils::Socket::Startup(); - utils::Assert(links.size() == 0, "can only call Init once"); + utils::Assert(all_links.size() == 0, "can only call Init once"); this->host_uri = utils::SockAddr::GetHostName(); // get information from tracker this->ReConnectLinks(); } void AllreduceBase::Shutdown(void) { - for (size_t i = 0; i < links.size(); ++i) { - links[i].sock.Close(); + for (size_t i = 0; i < all_links.size(); ++i) { + all_links[i].sock.Close(); } - links.clear(); + all_links.clear(); + tree_links.plinks.clear(); if (tracker_uri == "NULL") return; int magic = kMagic; @@ -121,8 +123,12 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { utils::Assert(tracker.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); tracker.SendStr(task_id); tracker.SendStr(std::string(cmd)); + // the rank of previous link, next link in ring + int prev_rank, next_rank; + // the rank of neighbors + std::map tree_neighbors; {// get new ranks - int newrank; + int newrank, num_neighbors; utils::Assert(tracker.RecvAll(&newrank, sizeof(newrank)) == sizeof(newrank), "ReConnectLink failure 4"); utils::Assert(tracker.RecvAll(&parent_rank, sizeof(parent_rank)) == sizeof(parent_rank), @@ -130,8 +136,20 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { utils::Assert(tracker.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), "ReConnectLink failure 4"); utils::Assert(rank == -1 || newrank == rank, "must keep rank to same if the node already have one"); - rank = newrank; - } + rank = newrank; + utils::Assert(tracker.RecvAll(&num_neighbors, sizeof(num_neighbors)) == sizeof(num_neighbors), + "ReConnectLink failure 4"); + for (int i = 0; i < num_neighbors; ++i) { + int nrank; + utils::Assert(tracker.RecvAll(&nrank, sizeof(nrank)) == sizeof(nrank), + "ReConnectLink failure 4"); + tree_neighbors[nrank] = 1; + } + utils::Assert(tracker.RecvAll(&prev_rank, sizeof(prev_rank)) == sizeof(prev_rank), + "ReConnectLink failure 4"); + utils::Assert(tracker.RecvAll(&next_rank, sizeof(next_rank)) == sizeof(next_rank), + "ReConnectLink failure 4"); + } // create listening socket utils::TCPSocket sock_listen; sock_listen.Create(); @@ -144,11 +162,11 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { do { // send over good links std::vector good_link; - for (size_t i = 0; i < links.size(); ++i) { - if (!links[i].sock.BadSocket()) { - good_link.push_back(static_cast(links[i].rank)); + for (size_t i = 0; i < all_links.size(); ++i) { + if (!all_links[i].sock.BadSocket()) { + good_link.push_back(static_cast(all_links[i].rank)); } else { - if (!links[i].sock.IsClosed()) links[i].sock.Close(); + if (!all_links[i].sock.IsClosed()) all_links[i].sock.Close(); } } int ngood = static_cast(good_link.size()); @@ -178,13 +196,13 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { utils::Assert(r.sock.RecvAll(&r.rank, sizeof(r.rank)) == sizeof(r.rank), "ReConnectLink failure 13"); utils::Check(hrank == r.rank, "ReConnectLink failure, link rank inconsistent"); bool match = false; - for (size_t i = 0; i < links.size(); ++i) { - if (links[i].rank == hrank) { - utils::Assert(links[i].sock.IsClosed(), "Override a link that is active"); - links[i].sock = r.sock; match = true; break; + for (size_t i = 0; i < all_links.size(); ++i) { + if (all_links[i].rank == hrank) { + utils::Assert(all_links[i].sock.IsClosed(), "Override a link that is active"); + all_links[i].sock = r.sock; match = true; break; } } - if (!match) links.push_back(r); + if (!match) all_links.push_back(r); } utils::Assert(tracker.SendAll(&num_error, sizeof(num_error)) == sizeof(num_error), "ReConnectLink failure 14"); } while (num_error != 0); @@ -199,27 +217,35 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { utils::Assert(r.sock.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 15"); utils::Assert(r.sock.RecvAll(&r.rank, sizeof(r.rank)) == sizeof(r.rank), "ReConnectLink failure 15"); bool match = false; - for (size_t i = 0; i < links.size(); ++i) { - if (links[i].rank == r.rank) { - utils::Assert(links[i].sock.IsClosed(), "Override a link that is active"); - links[i].sock = r.sock; match = true; break; + for (size_t i = 0; i < all_links.size(); ++i) { + if (all_links[i].rank == r.rank) { + utils::Assert(all_links[i].sock.IsClosed(), "Override a link that is active"); + all_links[i].sock = r.sock; match = true; break; } } - if (!match) links.push_back(r); + if (!match) all_links.push_back(r); } // close listening sockets sock_listen.Close(); this->parent_index = -1; - // setup selecter - for (size_t i = 0; i < links.size(); ++i) { - utils::Assert(!links[i].sock.BadSocket(), "ReConnectLink: bad socket"); + // setup tree links and ring structure + tree_links.plinks.clear(); + for (size_t i = 0; i < all_links.size(); ++i) { + utils::Assert(!all_links[i].sock.BadSocket(), "ReConnectLink: bad socket"); // set the socket to non-blocking mode - links[i].sock.SetNonBlock(true); - if (links[i].rank == parent_rank) parent_index = static_cast(i); - } - if (parent_rank != -1) { - utils::Assert(parent_index != -1, "cannot find parent in the link"); + all_links[i].sock.SetNonBlock(true); + if (tree_neighbors.count(all_links[i].rank) != 0) { + if (all_links[i].rank == parent_rank) { + parent_index = static_cast(tree_links.plinks.size()); + } + tree_links.plinks.push_back(&all_links[i]); + } + if (all_links[i].rank == prev_rank) ring_prev = &all_links[i]; + if (all_links[i].rank == next_rank) ring_next = &all_links[i]; } + utils::Assert(parent_rank == -1 || parent_index != -1, "cannot find parent in the link"); + utils::Assert(prev_rank == -1 || ring_prev != NULL, "cannot find prev ring in the link"); + utils::Assert(next_rank == -1 || ring_next != NULL, "cannot find next ring in the link"); } /*! * \brief perform in-place allreduce, on sendrecvbuf, this function can fail, and will return the cause of failure @@ -241,6 +267,7 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, ReduceFunction reducer) { + RefLinkVector &links = tree_links; if (links.size() == 0 || count == 0) return kSuccess; // total size of message const size_t total_size = type_nbytes * count; @@ -391,8 +418,9 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, */ AllreduceBase::ReturnType AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { + RefLinkVector &links = tree_links; if (links.size() == 0 || total_size == 0) return kSuccess; - utils::Check(root < world_size, "Broadcast: root should be smaller than world size"); + utils::Check(root < world_size, "Broadcast: root should be smaller than world size"); // number of links const int nlink = static_cast(links.size()); // size of space already read from data diff --git a/src/allreduce_base.h b/src/allreduce_base.h index 6eea948ce..4ef4a044e 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -259,6 +259,19 @@ class AllreduceBase : public IEngine { // aligned with 64 bits, will be able to perform 64 bits operations freely std::vector buffer_; }; + /*! + * \brief simple data structure that works like a vector + * but takes reference instead of space + */ + struct RefLinkVector { + std::vector plinks; + inline LinkRecord &operator[](size_t i) { + return *plinks[i]; + } + inline size_t size(void) const { + return plinks.size(); + } + }; /*! * \brief connect to the tracker to fix the the missing links * this function is also used when the engine start up @@ -306,9 +319,11 @@ class AllreduceBase : public IEngine { int parent_index; // rank of parent node, can be -1 int parent_rank; - // sockets of all links - std::vector links; - // pointer to someplace in the ring + // sockets of all links this connects to + std::vector all_links; + // all the links in the reduction tree connection + RefLinkVector tree_links; + // pointer to links in the ring LinkRecord *ring_prev, *ring_next; //----- meta information----- // unique identifier of the possible job this process is doing diff --git a/src/allreduce_robust-inl.h b/src/allreduce_robust-inl.h index f1f557593..49f8f2c37 100644 --- a/src/allreduce_robust-inl.h +++ b/src/allreduce_robust-inl.h @@ -37,6 +37,7 @@ AllreduceRobust::MsgPassing(const NodeType &node_value, const std::vector &edge_in, size_t out_index) ) { + RefLinkVector &links = tree_links; if (links.size() == 0) return kSuccess; // number of links const int nlink = static_cast(links.size()); diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index dbb318c33..c1d5119cc 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -11,13 +11,14 @@ #include #include "./io.h" #include "./utils.h" +#include "./rabit.h" #include "./allreduce_robust.h" namespace rabit { namespace engine { AllreduceRobust::AllreduceRobust(void) { result_buffer_round = 1; - num_local_replica = 2; + num_local_replica = 0; seq_counter = 0; } /*! \brief shutdown the engine */ @@ -131,9 +132,17 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) */ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, utils::ISerializable *local_model) { - utils::Check(local_model == NULL, "CheckPoint local_model is not yet supported"); - // check if we succesfll + if (num_local_replica == 0) { + utils::Check(local_model == NULL, "need to set num_local_replica larger than 1 to checkpoint local_model"); + } + // check if we succesful if (RecoverExec(NULL, 0, ActionSummary::kLoadCheck, ActionSummary::kSpecialOp)) { + if (local_model != NULL) { + // load in local model + utils::MemoryFixSizeBuffer fs(BeginPtr(local_chkpt[local_chkpt_version]), + local_rptr[local_chkpt_version][1]); + local_model->Load(fs); + } // reset result buffer resbuf.Clear(); seq_counter = 0; // load from buffer @@ -170,7 +179,31 @@ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, */ void AllreduceRobust::CheckPoint(const utils::ISerializable *global_model, const utils::ISerializable *local_model) { - utils::Assert(local_model == NULL, "CheckPoint local model is not supported yet"); + if (num_local_replica == 0) { + utils::Check(local_model == NULL, "need to set num_local_replica larger than 1 to checkpoint local_model"); + } + if (num_local_replica != 0) { + while (true) { + if (RecoverExec(NULL, 0, 0, ActionSummary::kLocalCheckPoint)) break; + // save model model to new version place + int new_version = !local_chkpt_version; + local_chkpt[new_version].clear(); + utils::MemoryBufferStream fs(&local_chkpt[new_version]); + if (local_model != NULL) { + local_model->Save(fs); + } + local_rptr[new_version].clear(); + local_rptr[new_version].push_back(0); + local_rptr[new_version].push_back(local_chkpt[new_version].length()); + if (CheckAndRecover(TryCheckinLocalState(&local_rptr[new_version], + &local_chkpt[new_version]))) break; + } + // run the ack phase + utils::Assert(RecoverExec(NULL, 0, 0, ActionSummary::kLocalCheckAck), + "check point must return true"); + // switch pointer to new version + local_chkpt_version = !local_chkpt_version; + } // execute checkpoint, note: when checkpoint existing, load will not happen utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kSpecialOp), "check point must return true"); @@ -199,32 +232,32 @@ void AllreduceRobust::CheckPoint(const utils::ISerializable *global_model, */ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { // number of links - const int nlink = static_cast(links.size()); + const int nlink = static_cast(all_links.size()); for (int i = 0; i < nlink; ++i) { - links[i].InitBuffer(sizeof(int), 1 << 10, reduce_buffer_size); - links[i].ResetSize(); + all_links[i].InitBuffer(sizeof(int), 1 << 10, reduce_buffer_size); + all_links[i].ResetSize(); } // read and discard data from all channels until pass mark while (true) { for (int i = 0; i < nlink; ++i) { - if (links[i].sock.BadSocket()) continue; - if (links[i].size_write == 0) { + if (all_links[i].sock.BadSocket()) continue; + if (all_links[i].size_write == 0) { char sig = kOOBReset; - ssize_t len = links[i].sock.Send(&sig, sizeof(sig), MSG_OOB); + ssize_t len = all_links[i].sock.Send(&sig, sizeof(sig), MSG_OOB); // error will be filtered in next loop - if (len == sizeof(sig)) links[i].size_write = 1; + if (len == sizeof(sig)) all_links[i].size_write = 1; } - if (links[i].size_write == 1) { + if (all_links[i].size_write == 1) { char sig = kResetMark; - ssize_t len = links[i].sock.Send(&sig, sizeof(sig)); - if (len == sizeof(sig)) links[i].size_write = 2; + ssize_t len = all_links[i].sock.Send(&sig, sizeof(sig)); + if (len == sizeof(sig)) all_links[i].size_write = 2; } } utils::SelectHelper rsel; bool finished = true; for (int i = 0; i < nlink; ++i) { - if (links[i].size_write != 2 && !links[i].sock.BadSocket()) { - rsel.WatchWrite(links[i].sock); finished = false; + if (all_links[i].size_write != 2 && !all_links[i].sock.BadSocket()) { + rsel.WatchWrite(all_links[i].sock); finished = false; } } if (finished) break; @@ -232,32 +265,32 @@ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { rsel.Select(); } for (int i = 0; i < nlink; ++i) { - if (!links[i].sock.BadSocket()) { - utils::SelectHelper::WaitExcept(links[i].sock); + if (!all_links[i].sock.BadSocket()) { + utils::SelectHelper::WaitExcept(all_links[i].sock); } } while (true) { for (int i = 0; i < nlink; ++i) { - if (links[i].size_read == 0) { - int atmark = links[i].sock.AtMark(); + if (all_links[i].size_read == 0) { + int atmark = all_links[i].sock.AtMark(); if (atmark < 0) { - utils::Assert(links[i].sock.BadSocket(), "must already gone bad"); + utils::Assert(all_links[i].sock.BadSocket(), "must already gone bad"); } else if (atmark > 0) { - links[i].size_read = 1; + all_links[i].size_read = 1; } else { // no at mark, read and discard data - ssize_t len = links[i].sock.Recv(links[i].buffer_head, links[i].buffer_size); - if (links[i].sock.AtMark()) links[i].size_read = 1; + ssize_t len = all_links[i].sock.Recv(all_links[i].buffer_head, all_links[i].buffer_size); + if (all_links[i].sock.AtMark()) all_links[i].size_read = 1; // zero length, remote closed the connection, close socket - if (len == 0) links[i].sock.Close(); + if (len == 0) all_links[i].sock.Close(); } } } utils::SelectHelper rsel; bool finished = true; for (int i = 0; i < nlink; ++i) { - if (links[i].size_read == 0 && !links[i].sock.BadSocket()) { - rsel.WatchRead(links[i].sock); finished = false; + if (all_links[i].size_read == 0 && !all_links[i].sock.BadSocket()) { + rsel.WatchRead(all_links[i].sock); finished = false; } } if (finished) break; @@ -266,22 +299,22 @@ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { // start synchronization, use blocking I/O to avoid select for (int i = 0; i < nlink; ++i) { - if (!links[i].sock.BadSocket()) { + if (!all_links[i].sock.BadSocket()) { char oob_mark; - links[i].sock.SetNonBlock(false); - ssize_t len = links[i].sock.Recv(&oob_mark, sizeof(oob_mark), MSG_WAITALL); + all_links[i].sock.SetNonBlock(false); + ssize_t len = all_links[i].sock.Recv(&oob_mark, sizeof(oob_mark), MSG_WAITALL); if (len == 0) { - links[i].sock.Close(); continue; + all_links[i].sock.Close(); continue; } else if (len > 0) { utils::Assert(oob_mark == kResetMark, "wrong oob msg"); - utils::Assert(links[i].sock.AtMark() != 1, "should already read past mark"); + utils::Assert(all_links[i].sock.AtMark() != 1, "should already read past mark"); } else { utils::Assert(errno != EAGAIN|| errno != EWOULDBLOCK, "BUG"); } // send out ack char ack = kResetAck; while (true) { - len = links[i].sock.Send(&ack, sizeof(ack)); + len = all_links[i].sock.Send(&ack, sizeof(ack)); if (len == sizeof(ack)) break; if (len == -1) { if (errno != EAGAIN && errno != EWOULDBLOCK) break; @@ -291,22 +324,22 @@ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { } // wait all ack for (int i = 0; i < nlink; ++i) { - if (!links[i].sock.BadSocket()) { + if (!all_links[i].sock.BadSocket()) { char ack; - ssize_t len = links[i].sock.Recv(&ack, sizeof(ack), MSG_WAITALL); + ssize_t len = all_links[i].sock.Recv(&ack, sizeof(ack), MSG_WAITALL); if (len == 0) { - links[i].sock.Close(); continue; + all_links[i].sock.Close(); continue; } else if (len > 0) { utils::Assert(ack == kResetAck, "wrong Ack MSG"); } else { utils::Assert(errno != EAGAIN|| errno != EWOULDBLOCK, "BUG"); } // set back to nonblock mode - links[i].sock.SetNonBlock(true); + all_links[i].sock.SetNonBlock(true); } } for (int i = 0; i < nlink; ++i) { - if (links[i].sock.BadSocket()) return kSockError; + if (all_links[i].sock.BadSocket()) return kSockError; } return kSuccess; } @@ -320,8 +353,8 @@ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { bool AllreduceRobust::CheckAndRecover(ReturnType err_type) { if (err_type == kSuccess) return true; // simple way, shutdown all links - for (size_t i = 0; i < links.size(); ++i) { - if (!links[i].sock.BadSocket()) links[i].sock.Close(); + for (size_t i = 0; i < all_links.size(); ++i) { + if (!all_links[i].sock.BadSocket()) all_links[i].sock.Close(); } ReConnectLinks("recover"); return false; @@ -479,6 +512,7 @@ AllreduceRobust::TryRecoverData(RecoverType role, size_t size, int recv_link, const std::vector &req_in) { + RefLinkVector &links = tree_links; // no need to run recovery for zero size message if (links.size() == 0 || size == 0) return kSuccess; utils::Assert(req_in.size() == links.size(), "TryRecoverData"); @@ -580,17 +614,48 @@ AllreduceRobust::TryRecoverData(RecoverType role, * \sa ReturnType */ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) { - RecoverType role = requester ? kRequestData : kHaveData; + // check in local data + RecoverType role = requester ? kRequestData : kHaveData; + ReturnType succ; + if (num_local_replica != 0) { + if (requester) { + // clear existing history, if any, before load + local_rptr[local_chkpt_version].clear(); + local_chkpt[local_chkpt_version].clear(); + } + // recover local checkpoint + succ = TryRecoverLocalState(&local_rptr[local_chkpt_version], + &local_chkpt[local_chkpt_version]); + if (succ != kSuccess) return succ; + int nlocal = std::max(static_cast(local_rptr[local_chkpt_version].size()) - 1, 0); + // check if everyone is OK + unsigned state = 0; + if (nlocal == num_local_replica + 1) { + // complete recovery + state = 1; + } else if (nlocal == 0) { + // get nothing + state = 2; + } else { + // partially complete state + state = 4; + } + succ = TryAllreduce(&state, sizeof(state), 1, op::Reducer); + if (succ != kSuccess) return succ; + utils::Check(state == 1 || state == 2, + "LoadCheckPoint: too many nodes fails, cannot recover local state"); + } + // recover global checkpoint size_t size = this->global_checkpoint.length(); int recv_link; std::vector req_in; - ReturnType succ = TryDecideRouting(role, &size, &recv_link, &req_in); + succ = TryDecideRouting(role, &size, &recv_link, &req_in); if (succ != kSuccess) return succ; if (role == kRequestData) { global_checkpoint.resize(size); } if (size == 0) return kSuccess; - return TryRecoverData(role, &global_checkpoint[0], size, recv_link, req_in); + return TryRecoverData(role, BeginPtr(global_checkpoint), size, recv_link, req_in); } /*! * \brief try to get the result of operation specified by seqno @@ -607,11 +672,21 @@ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) { * \sa ReturnType */ AllreduceRobust::ReturnType -AllreduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { RecoverType role; +AllreduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { // if minimum sequence requested is local check point ack, // this means all nodes have finished local check point, directly return if (seqno == ActionSummary::kLocalCheckAck) return kSuccess; - + if (seqno == ActionSummary::kLocalCheckPoint) { + // new version of local model + int new_version = !local_chkpt_version; + int nlocal = std::max(static_cast(local_rptr[new_version].size()) - 1, 0); + // if we goes to this place, use must have already setup the state once + utils::Assert(nlocal == 1 || nlocal == num_local_replica + 1, + "TryGetResult::Checkpoint"); + return TryRecoverLocalState(&local_rptr[new_version], &local_chkpt[new_version]); + } + // handles normal data recovery + RecoverType role; if (!requester) { sendrecvbuf = resbuf.Query(seqno, &size); role = sendrecvbuf != NULL ? kHaveData : kPassData; @@ -786,7 +861,7 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, } chkpt.resize(rptr.back()); // pass data through the link - succ = RingPassing(&chkpt[0], rptr[nlocal], rptr[nread_end], + succ = RingPassing(BeginPtr(chkpt), rptr[nlocal], rptr[nread_end], rptr[nwrite_start], rptr[nread_end], ring_next, ring_prev); if (succ != kSuccess) { @@ -849,7 +924,7 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, } chkpt.resize(rptr.back()); // pass data through the link - succ = RingPassing(&chkpt[0], rptr[nlocal], rptr[nread_end], + succ = RingPassing(BeginPtr(chkpt), rptr[nlocal], rptr[nread_end], rptr[nwrite_start], rptr[nwrite_end], ring_prev, ring_next); if (succ != kSuccess) { @@ -858,6 +933,57 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, } return kSuccess; } +/*! + * \brief try to checkpoint local state, this function is called in normal executation phase + * of checkpoint that contains local state + * the input state must exactly one saved state(local state of current node), + * after complete, this function will get local state from previous num_local_replica nodes and put them + * into local_chkpt and local_rptr + * + * It is also OK to call TryRecoverLocalState instead, + * TryRecoverLocalState makes less assumption about the input, and requires more communications + * + * \param p_local_rptr the pointer to the segment pointers in the states array + * \param p_local_chkpt the pointer to the storage of local check points + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType, TryRecoverLocalState + */ +AllreduceRobust::ReturnType +AllreduceRobust::TryCheckinLocalState(std::vector *p_local_rptr, + std::string *p_local_chkpt) { + // if there is no local replica, we can do nothing + if (num_local_replica == 0) return kSuccess; + std::vector &rptr = *p_local_rptr; + std::string &chkpt = *p_local_chkpt; + utils::Assert(rptr.size() == 2, "TryCheckinLocalState must have exactly 1 state"); + const int n = num_local_replica; + std::vector sizes(n + 1); + sizes[0] = rptr[1] - rptr[0]; + ReturnType succ; + // pass size through the link + succ = RingPassing(BeginPtr(sizes), + 1 * sizeof(size_t), + (n + 1) * sizeof(size_t), + 0 * sizeof(size_t), + n * sizeof(size_t), + ring_prev, ring_next); + if (succ != kSuccess) return succ; + // update rptr + rptr.resize(n + 1); + for (int i = 1; i < n; ++i) { + rptr[i + 1] = rptr[i] + sizes[i]; + } + chkpt.resize(rptr.back()); + // pass data through the link + succ = RingPassing(BeginPtr(chkpt), + rptr[1], rptr[n + 1], + rptr[0], rptr[n], + ring_prev, ring_next); + if (succ != kSuccess) { + rptr.resize(2); chkpt.resize(rptr.back()); return succ; + } + return kSuccess; +} /*! * \brief perform a ring passing to receive data from prev link, and sent data to next link * this allows data to stream over a ring structure @@ -883,7 +1009,7 @@ AllreduceRobust::RingPassing(void *sendrecvbuf_, size_t write_end, LinkRecord *read_link, LinkRecord *write_link) { - if (links.size() == 0 || read_end == 0) return kSuccess; + if (read_link == NULL || write_link == NULL || read_end == 0) return kSuccess; utils::Assert(read_end <= write_end, "boundary check"); utils::Assert(read_ptr <= read_end, "boundary check"); utils::Assert(write_ptr <= write_end, "boundary check"); diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index 570960e52..e43e9ac66 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -372,6 +372,23 @@ class AllreduceRobust : public AllreduceBase { */ ReturnType TryRecoverLocalState(std::vector *p_local_rptr, std::string *p_local_chkpt); + /*! + * \brief try to checkpoint local state, this function is called in normal executation phase + * of checkpoint that contains local state +o * the input state must exactly one saved state(local state of current node), + * after complete, this function will get local state from previous num_local_replica nodes and put them + * into local_chkpt and local_rptr + * + * It is also OK to call TryRecoverLocalState instead, + * TryRecoverLocalState makes less assumption about the input, and requires more communications + * + * \param p_local_rptr the pointer to the segment pointers in the states array + * \param p_local_chkpt the pointer to the storage of local check points + * \return this function can return kSuccess/kSockError/kGetExcept, see ReturnType for details + * \sa ReturnType, TryRecoverLocalState + */ + ReturnType TryCheckinLocalState(std::vector *p_local_rptr, + std::string *p_local_chkpt); /*! * \brief perform a ring passing to receive data from prev link, and sent data to next link * this allows data to stream over a ring structure @@ -441,7 +458,7 @@ class AllreduceRobust : public AllreduceBase { // local_model[rptr[k]:rptr[k+1]] stores the model of node in previous k hops in the ring std::vector local_rptr[2]; // storage for local model replicas - std::string local_checkpoint[2]; + std::string local_chkpt[2]; // version of local checkpoint can be 1 or 0 int local_chkpt_version; }; diff --git a/src/rabit_tracker.py b/src/rabit_tracker.py index ceda6347f..fe01a87da 100644 --- a/src/rabit_tracker.py +++ b/src/rabit_tracker.py @@ -63,25 +63,32 @@ class SlaveEntry: return job_map[self.jobid] return -1 - def get_neighbor(self, rank, nslave): - rank = rank + 1 - ret = [] - if rank > 1: - ret.append(rank / 2 - 1) - if rank * 2 - 1 < nslave: - ret.append(rank * 2 - 1) - if rank * 2 < nslave: - ret.append(rank * 2) - return set(ret) - - def assign_rank(self, rank, wait_conn, nslave): + def assign_rank(self, rank, wait_conn, tree_map, parent_map, ring_map): self.rank = rank - nnset = self.get_neighbor(rank, nslave) + nnset = set(tree_map[rank]) + rprev, rnext = ring_map[rank] self.sock.sendint(rank) # send parent rank - self.sock.sendint((rank + 1) / 2 - 1) + self.sock.sendint(parent_map[rank]) # send world size - self.sock.sendint(nslave) + self.sock.sendint(len(tree_map)) + self.sock.sendint(len(nnset)) + # send the rprev and next link + for r in nnset: + self.sock.sendint(r) + # send prev link + if rprev != -1 and rprev != rank: + nnset.add(rprev) + self.sock.sendint(rprev) + else: + self.sock.sendint(-1) + # send next link + if rnext != -1 and rnext != rank: + nnset.add(rnext) + self.sock.sendint(rnext) + else: + self.sock.sendint(-1) + while True: ngood = self.sock.recvint() goodset = set([]) @@ -131,8 +138,35 @@ class Tracker: self.sock.close() def slave_args(self): return ['rabit_tracker_uri=%s' % socket.gethostname(), - 'rabit_tracker_port=%s' % self.port] + 'rabit_tracker_port=%s' % self.port] + def get_neighbor(self, rank, nslave): + rank = rank + 1 + ret = [] + if rank > 1: + ret.append(rank / 2 - 1) + if rank * 2 - 1 < nslave: + ret.append(rank * 2 - 1) + if rank * 2 < nslave: + ret.append(rank * 2) + return ret + def get_tree(self, nslave): + tree_map = {} + parent_map = {} + for r in range(nslave): + tree_map[r] = self.get_neighbor(r, nslave) + parent_map[r] = (r + 1) / 2 - 1 + return tree_map, parent_map + def get_ring(self, tree_map, parent_map): + ring_map = {} + nslave = len(tree_map) + for r in range(nslave): + rprev = (r + nslave - 1) % nslave + rnext = (r + 1) % nslave + ring_map[r] = (rprev, rnext) + return ring_map def accept_slaves(self, nslave): + tree_map, parent_map = self.get_tree(nslave) + ring_map = self.get_ring(tree_map, parent_map) # set of nodes that finishs the job shutdown = {} # set of nodes that is waiting for connections @@ -163,7 +197,7 @@ class Tracker: rank = todo_nodes.pop(0) if s.jobid != 'NULL': job_map[s.jobid] = rank - s.assign_rank(rank, wait_conn, nslave) + s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map) if s.wait_accept > 0: wait_conn[rank] = s print 'All nodes finishes job' diff --git a/src/socket.h b/src/socket.h index eba1b89f8..65516690d 100644 --- a/src/socket.h +++ b/src/socket.h @@ -153,7 +153,8 @@ class Socket { * \param end_port ending port number to try * \return the port successfully bind to, return -1 if failed to bind any port */ - inline int TryBindHost(int start_port, int end_port) { + inline int TryBindHost(int start_port, int end_port) { + // TODO, add prefix check for (int port = start_port; port < end_port; ++port) { SockAddr addr("0.0.0.0", port); if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == 0) { diff --git a/src/utils.h b/src/utils.h index d09667d89..e1b34fe2e 100644 --- a/src/utils.h +++ b/src/utils.h @@ -187,5 +187,13 @@ inline const T *BeginPtr(const std::vector &vec) { return &vec[0]; } } +inline char* BeginPtr(std::string &str) { + if (str.length() == 0) return NULL; + return &str[0]; +} +inline const char* BeginPtr(const std::string &str) { + if (str.length() == 0) return NULL; + return &str[0]; +} } // namespace rabit #endif // RABIT_UTILS_H_ diff --git a/submit_mpi.py b/submit_mpi.py index 3a65ec440..468604317 100755 --- a/submit_mpi.py +++ b/submit_mpi.py @@ -24,7 +24,10 @@ def mpi_submit(nslave, args): args arguments to launch each job this usually includes the parameters of master_uri and parameters passed into submit """ - cmd = ' '.join(['mpirun -n %d --hostfile %s' % (nslave, args[0])] + args[1:]) + if args[0] == 'local': + cmd = ' '.join(['mpirun -n %d' % (nslave)] + args[1:]) + else: + cmd = ' '.join(['mpirun -n %d --hostfile %s' % (nslave, args[0])] + args[1:]) print cmd subprocess.check_call(cmd, shell = True) From 8d570b54c78fd6d9bb2282277ec16c89f35c4715 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 7 Dec 2014 16:22:02 -0800 Subject: [PATCH 107/531] add code to help link reuse, start test numreplica --- src/rabit_tracker.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/rabit_tracker.py b/src/rabit_tracker.py index fe01a87da..c7068e31d 100644 --- a/src/rabit_tracker.py +++ b/src/rabit_tracker.py @@ -88,7 +88,6 @@ class SlaveEntry: self.sock.sendint(rnext) else: self.sock.sendint(-1) - while True: ngood = self.sock.recvint() goodset = set([]) @@ -156,13 +155,37 @@ class Tracker: tree_map[r] = self.get_neighbor(r, nslave) parent_map[r] = (r + 1) / 2 - 1 return tree_map, parent_map + def find_share_ring(self, tree_map, parent_map, r): + """ + get a ring structure that tends to share nodes with the tree + return a list starting from r + """ + nset = set(tree_map[r]) + cset = nset - set([parent_map[r]]) + if len(cset) == 0: + return [r] + rlst = [r] + cnt = 0 + for v in cset: + vlst = self.find_share_ring(tree_map, parent_map, v) + cnt += 1 + if cnt == len(cset): + vlst.reverse() + rlst += vlst + return rlst def get_ring(self, tree_map, parent_map): + """ + get a ring connection used to recover local data + """ + assert parent_map[0] == -1 + rlst = self.find_share_ring(tree_map, parent_map, 0) + assert len(rlst) == len(tree_map) ring_map = {} - nslave = len(tree_map) + nslave = len(tree_map) for r in range(nslave): rprev = (r + nslave - 1) % nslave rnext = (r + 1) % nslave - ring_map[r] = (rprev, rnext) + ring_map[rlst[r]] = (rlst[rprev], rlst[rnext]) return ring_map def accept_slaves(self, nslave): tree_map, parent_map = self.get_tree(nslave) From b38fa40fa6462e266f3a165c2ef9817b905f505b Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 7 Dec 2014 20:25:42 -0800 Subject: [PATCH 108/531] fix ring passing --- src/allreduce_robust.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index c1d5119cc..3d2f128f2 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -45,6 +45,9 @@ void AllreduceRobust::SetParam(const char *name, const char *val) { if (!strcmp(name, "result_replicate")) { result_buffer_round = std::max(world_size / atoi(val), 1); } + if (!strcmp(name, "num_local_replica")) { + num_local_replica = atoi(val); + } } /*! * \brief perform in-place allreduce, on sendrecvbuf @@ -1010,9 +1013,9 @@ AllreduceRobust::RingPassing(void *sendrecvbuf_, LinkRecord *read_link, LinkRecord *write_link) { if (read_link == NULL || write_link == NULL || read_end == 0) return kSuccess; - utils::Assert(read_end <= write_end, "boundary check"); - utils::Assert(read_ptr <= read_end, "boundary check"); - utils::Assert(write_ptr <= write_end, "boundary check"); + utils::Assert(write_end <= read_end, "RingPassing: boundary check1"); + utils::Assert(read_ptr <= read_end, "RingPassing: boundary check2"); + utils::Assert(write_ptr <= write_end, "RingPassing: boundary check3"); // take reference LinkRecord &prev = *read_link, &next = *write_link; // send recv buffer From 2750679270219102c51a2da8f22bbe171bf40d4a Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 7 Dec 2014 20:57:29 -0800 Subject: [PATCH 109/531] normal state running ok --- src/allreduce_robust.cc | 17 ++-- src/mock.h | 12 +-- src/rabit-inl.h | 10 ++- src/rabit.h | 26 ++++-- test/Makefile | 6 +- test/keepalive.sh | 4 +- test/test_local_recover.cpp | 154 ++++++++++++++++++++++++++++++++++++ test/test_model_recover.cpp | 2 +- toolkit/kmeans.cpp | 2 +- 9 files changed, 203 insertions(+), 30 deletions(-) create mode 100644 test/test_local_recover.cpp diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 3d2f128f2..1de92b7d6 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -26,12 +26,12 @@ void AllreduceRobust::Shutdown(void) { // need to sync the exec before we shutdown, do a pesudo check point // execute checkpoint, note: when checkpoint existing, load will not happen utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kSpecialOp), - "check point must return true"); + "Shutdown: check point must return true"); // reset result buffer resbuf.Clear(); seq_counter = 0; // execute check ack step, load happens here utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp), - "check ack must return true"); + "Shutdown: check ack must return true"); AllreduceBase::Shutdown(); } /*! @@ -201,9 +201,8 @@ void AllreduceRobust::CheckPoint(const utils::ISerializable *global_model, if (CheckAndRecover(TryCheckinLocalState(&local_rptr[new_version], &local_chkpt[new_version]))) break; } - // run the ack phase - utils::Assert(RecoverExec(NULL, 0, 0, ActionSummary::kLocalCheckAck), - "check point must return true"); + // run the ack phase, can be true or false + RecoverExec(NULL, 0, 0, ActionSummary::kLocalCheckAck); // switch pointer to new version local_chkpt_version = !local_chkpt_version; } @@ -678,7 +677,7 @@ AllreduceRobust::ReturnType AllreduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool requester) { // if minimum sequence requested is local check point ack, // this means all nodes have finished local check point, directly return - if (seqno == ActionSummary::kLocalCheckAck) return kSuccess; + if (seqno == ActionSummary::kLocalCheckAck) return kSuccess; if (seqno == ActionSummary::kLocalCheckPoint) { // new version of local model int new_version = !local_chkpt_version; @@ -972,8 +971,8 @@ AllreduceRobust::TryCheckinLocalState(std::vector *p_local_rptr, ring_prev, ring_next); if (succ != kSuccess) return succ; // update rptr - rptr.resize(n + 1); - for (int i = 1; i < n; ++i) { + rptr.resize(n + 2); + for (int i = 1; i <= n; ++i) { rptr[i + 1] = rptr[i] + sizes[i]; } chkpt.resize(rptr.back()); @@ -1013,7 +1012,7 @@ AllreduceRobust::RingPassing(void *sendrecvbuf_, LinkRecord *read_link, LinkRecord *write_link) { if (read_link == NULL || write_link == NULL || read_end == 0) return kSuccess; - utils::Assert(write_end <= read_end, "RingPassing: boundary check1"); + utils::Assert(write_end <= read_end, "RingPassing: boundary check1, write_end=%lu, read_end=%lu", write_end, read_end); utils::Assert(read_ptr <= read_end, "RingPassing: boundary check2"); utils::Assert(write_ptr <= write_end, "RingPassing: boundary check3"); // take reference diff --git a/src/mock.h b/src/mock.h index 31c93d113..e5a4c283a 100644 --- a/src/mock.h +++ b/src/mock.h @@ -30,14 +30,16 @@ public: rabit::Allreduce(sendrecvbuf, count); } - inline bool LoadCheckPoint(utils::ISerializable *p_model) { +inline int LoadCheckPoint(utils::ISerializable *global_model, + utils::ISerializable *local_model) { utils::Assert(verify(loadCheckpoint), "[%d] error when loading checkpoint", rank); - return rabit::LoadCheckPoint(p_model); + return rabit::LoadCheckPoint(global_model, local_model); } - - inline void CheckPoint(const utils::ISerializable &model) { + + inline void CheckPoint(const utils::ISerializable *global_model, + const utils::ISerializable *local_model) { utils::Assert(verify(checkpoint), "[%d] error when checkpointing", rank); - rabit::CheckPoint(model); + rabit::CheckPoint(global_model, local_model); } inline void Broadcast(std::string *sendrecv_data, int root) { diff --git a/src/rabit-inl.h b/src/rabit-inl.h index b13ea88fc..95a2eb8fd 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -124,12 +124,14 @@ inline void Allreduce(DType *sendrecvbuf, size_t count) { engine::mpi::GetType(), OP::kType); } // load latest check point -inline int LoadCheckPoint(utils::ISerializable *p_model) { - return engine::GetEngine()->LoadCheckPoint(p_model); +inline int LoadCheckPoint(utils::ISerializable *global_model, + utils::ISerializable *local_model) { + return engine::GetEngine()->LoadCheckPoint(global_model, local_model); } // checkpoint the model, meaning we finished a stage of execution -inline void CheckPoint(const utils::ISerializable &model) { - engine::GetEngine()->CheckPoint(&model); +inline void CheckPoint(const utils::ISerializable *global_model, + const utils::ISerializable *local_model) { + engine::GetEngine()->CheckPoint(global_model, local_model); } // return the version number of currently stored model inline int VersionNumber(void) { diff --git a/src/rabit.h b/src/rabit.h index 68e39f3fa..f19792442 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -84,7 +84,12 @@ template inline void Allreduce(DType *sendrecvbuf, size_t count); /*! * \brief load latest check point - * \param p_model pointer to the model + * \param global_model pointer to the globally shared model/state + * when calling this function, the caller need to gauranttees that global_model + * is the same in all nodes + * \param local_model pointer to local model, that is specific to current node/rank + * this can be NULL when no local model is needed + * * \return the version number of check point loaded * if returned version == 0, this means no model has been CheckPointed * the p_model is not touched, user should do necessary initialization by themselves @@ -99,15 +104,24 @@ inline void Allreduce(DType *sendrecvbuf, size_t count); * * \sa CheckPoint, VersionNumber */ -inline int LoadCheckPoint(utils::ISerializable *p_model); +inline int LoadCheckPoint(utils::ISerializable *global_model, + utils::ISerializable *local_model = NULL); /*! * \brief checkpoint the model, meaning we finished a stage of execution * every time we call check point, there is a version number which will increase by one * - * \param p_model pointer to the model - * \sa LoadCheckPoint, VersionNumber - */ -inline void CheckPoint(const utils::ISerializable &model); + * \param global_model pointer to the globally shared model/state + * when calling this function, the caller need to gauranttees that global_model + * is the same in all nodes + * \param local_model pointer to local model, that is specific to current node/rank + * this can be NULL when no local state is needed + * NOTE: local_model requires explicit replication of the model for fault-tolerance, which will + * bring replication cost in CheckPoint function. global_model do not need explicit replication. + * So only CheckPoint with global_model if possible + * \sa LoadCheckPoint, VersionNumber + */ +inline void CheckPoint(const utils::ISerializable *global_model, + const utils::ISerializable *local_model = NULL); /*! * \return version number of current stored model, * which means how many calls to CheckPoint we made so far diff --git a/test/Makefile b/test/Makefile index f10229a1f..9f742be74 100644 --- a/test/Makefile +++ b/test/Makefile @@ -5,12 +5,12 @@ export LDFLAGS= -pthread -lm -lrt export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src # specify tensor path -BIN = test_allreduce test_recover test_model_recover speed_test +BIN = test_allreduce test_recover test_model_recover speed_test test_local_recover # objectives that makes up rabit library RABIT_OBJ = allreduce_base.o allreduce_robust.o engine.o MPIOBJ = engine_mpi.o -OBJ = $(RABIT_OBJ) test_allreduce.o test_recover.o test_model_recover.o speed_test.o +OBJ = $(RABIT_OBJ) test_allreduce.o test_recover.o test_model_recover.o speed_test.o test_local_recover.o MPIBIN = test_allreduce.mpi speed_test.mpi .PHONY: clean all @@ -24,6 +24,7 @@ test_allreduce.o: test_allreduce.cpp ../src/*.h speed_test.o: speed_test.cpp ../src/*.h test_recover.o: test_recover.cpp ../src/*.h test_model_recover.o: test_model_recover.cpp ../src/*.h +test_local_recover.o: test_local_recover.cpp ../src/*.h # we can link against MPI version to get use MPI test_allreduce: test_allreduce.o $(RABIT_OBJ) @@ -32,6 +33,7 @@ speed_test: speed_test.o $(RABIT_OBJ) speed_test.mpi: speed_test.o $(MPIOBJ) test_recover: test_recover.o $(RABIT_OBJ) test_model_recover: test_model_recover.o $(RABIT_OBJ) +test_local_recover: test_local_recover.o $(RABIT_OBJ) $(BIN) : $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) diff --git a/test/keepalive.sh b/test/keepalive.sh index e72a2bba9..ddfc5d618 100755 --- a/test/keepalive.sh +++ b/test/keepalive.sh @@ -6,8 +6,8 @@ then exit -1 fi nrep=0 -echo ./$@ job_id=$OMPI_COMM_WORLD_RANK -until ./$@ job_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do +echo ./$@ task_id=$OMPI_COMM_WORLD_RANK +until ./$@ task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do sleep 1 nrep=$((nrep+1)) echo ./$@ job_id=$OMPI_COMM_WORLD_RANK repeat=$nrep diff --git a/test/test_local_recover.cpp b/test/test_local_recover.cpp new file mode 100644 index 000000000..87262ba7b --- /dev/null +++ b/test/test_local_recover.cpp @@ -0,0 +1,154 @@ +// this is a test case to test whether rabit can recover model when +// facing an exception +#include +#include +#include +#include +#include +#include + +using namespace rabit; + +struct MockException { +}; + +// dummy model +class Model : public rabit::utils::ISerializable { + public: + // iterations + std::vector data; + // load from stream + virtual void Load(rabit::utils::IStream &fi) { + fi.Read(&data); + } + /*! \brief save the model to the stream */ + virtual void Save(rabit::utils::IStream &fo) const { + fo.Write(data); + } + virtual void InitModel(size_t n, float v) { + data.resize(n, v); + } +}; + +inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, int iter) { + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + const int z = iter + 111; + + std::vector ndata(model->data.size()); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % z + local->data[i]; + } + mock.Allreduce(&ndata[0], ndata.size()); + if (ntrial == iter && rank == 3) { + //exit(-1); + } + for (size_t i = 0; i < ndata.size(); ++i) { + float rmax = (i * 1) % z + model->data[i]; + for (int r = 0; r < nproc; ++r) { + rmax = std::max(rmax, (float)((i * (r+1)) % z) + model->data[i] + r); + } + utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank); + } + model->data = ndata; + local->data = ndata; + for (size_t i = 0; i < ndata.size(); ++i) { + local->data[i] = ndata[i] + rank; + } +} + +inline void TestSum(test::Mock &mock, Model *model, Model *local, int ntrial, int iter) { + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + const int z = 131 + iter; + + std::vector ndata(model->data.size()); + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % z + local->data[i]; + } + mock.Allreduce(&ndata[0], ndata.size()); + + if (ntrial == iter && rank == 0) { + exit(-1); + } + + for (size_t i = 0; i < ndata.size(); ++i) { + float rsum = 0.0f; + for (int r = 0; r < nproc; ++r) { + rsum += (float)((i * (r+1)) % z) + model->data[i] + r; + } + utils::Check(fabsf(rsum - ndata[i]) < 1e-5 , + "[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]); + } + model->data = ndata; + for (size_t i = 0; i < ndata.size(); ++i) { + local->data[i] = ndata[i] + rank; + } +} + +inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { + int rank = rabit::GetRank(); + std::string s; s.resize(n); + for (size_t i = 0; i < n; ++i) { + s[i] = char(i % 126 + 1); + } + std::string res; + if (root == rank) { + res = s; + mock.Broadcast(&res, root); + } else { + mock.Broadcast(&res, root); + } + utils::Check(res == s, "[%d] TestBcast fail", rank); +} + +int main(int argc, char *argv[]) { + if (argc < 3) { + printf("Usage: \n"); + return 0; + } + int n = atoi(argv[1]); + rabit::Init(argc, argv); + int rank = rabit::GetRank(); + int nproc = rabit::GetWorldSize(); + std::string name = rabit::GetProcessorName(); + test::Mock mock(rank, argv[2], argv[3]); + Model model, local; + srand(0); + int ntrial = 0; + for (int i = 1; i < argc; ++i) { + int n; + if (sscanf(argv[i], "repeat=%d", &n) == 1) ntrial = n; + } + while (true) { + try { + int iter = rabit::LoadCheckPoint(&model, &local); + if (iter == 0) { + model.InitModel(n, 1.0f); + local.InitModel(n, 1.0f + rank); + utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); + } else { + utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); + } + for (int r = iter; r < 3; ++r) { + TestMax(mock, &model, &local, ntrial, r); + utils::LogPrintf("[%d] !!!TestMax pass, iter=%d\n", rank, r); + int step = std::max(nproc / 3, 1); + for (int i = 0; i < nproc; i += step) { + TestBcast(mock, n, i, ntrial); + } + utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); + TestSum(mock, &model, &local, ntrial, r); + utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); + rabit::CheckPoint(&model, &local); + utils::LogPrintf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); + } + break; + } catch (MockException &e) { + rabit::engine::GetEngine()->InitAfterException(); + ++ntrial; + } + } + rabit::Finalize(); + return 0; +} diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index 86762c671..2b72cde75 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) { utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); TestSum(mock, &model, ntrial, r); utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); - rabit::CheckPoint(model); + rabit::CheckPoint(&model); utils::LogPrintf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); } break; diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index 674223cc6..e6dffd500 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -137,7 +137,7 @@ int main(int argc, char *argv[]) { } } model.Normalize(); - rabit::CheckPoint(model); + rabit::CheckPoint(&model); } // output the model file to somewhere if (rabit::GetRank() == 0) { From 39f2dcdfef1fe6481b61ae9524921d71833a07d2 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Mon, 8 Dec 2014 08:36:55 -0800 Subject: [PATCH 110/531] Update rabit_tracker.py --- src/rabit_tracker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rabit_tracker.py b/src/rabit_tracker.py index c7068e31d..910db18b1 100644 --- a/src/rabit_tracker.py +++ b/src/rabit_tracker.py @@ -211,9 +211,9 @@ class Tracker: shutdown[s.rank] = s continue assert s.cmd == 'start' or s.cmd == 'recover' - if s.cmd == 'recover': + if s.cmd == 'recover' assert s.rank >= 0 - print 'Recieve recover signal from %d' % s.rank + print 'Recieve %s signal from %d' % (s.cmd, s.rank) rank = s.decide_rank(job_map) if rank == -1: assert len(todo_nodes) != 0 From dfb3961eea957a1b2c7f57047dc7b20bf72d289c Mon Sep 17 00:00:00 2001 From: root Date: Mon, 8 Dec 2014 17:13:42 +0000 Subject: [PATCH 111/531] changing port --- src/rabit_tracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabit_tracker.py b/src/rabit_tracker.py index 910db18b1..7c5dfa1d2 100644 --- a/src/rabit_tracker.py +++ b/src/rabit_tracker.py @@ -121,7 +121,7 @@ class SlaveEntry: return rmset class Tracker: - def __init__(self, port = 9000, port_end = 9999): + def __init__(self, port = 9091, port_end = 9999): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) for port in range(port, port_end): try: From 5aff7fab29abf44ed8f1bc2f9035df763ee4fc1d Mon Sep 17 00:00:00 2001 From: root Date: Mon, 8 Dec 2014 17:15:49 +0000 Subject: [PATCH 112/531] adding : --- src/rabit_tracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rabit_tracker.py b/src/rabit_tracker.py index 7c5dfa1d2..025b5938a 100644 --- a/src/rabit_tracker.py +++ b/src/rabit_tracker.py @@ -211,7 +211,7 @@ class Tracker: shutdown[s.rank] = s continue assert s.cmd == 'start' or s.cmd == 'recover' - if s.cmd == 'recover' + if s.cmd == 'recover': assert s.rank >= 0 print 'Recieve %s signal from %d' % (s.cmd, s.rank) rank = s.decide_rank(job_map) From 3f22596e3c22d53a15e05e2f6b01541a887dcc3f Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 9 Dec 2014 20:57:54 -0800 Subject: [PATCH 113/531] check in license --- LICENSE | 13 +++++++++++++ src/allreduce_robust.cc | 15 ++++++++++----- test/test_local_recover.cpp | 2 +- test/test_model_recover.cpp | 4 ++-- 4 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..ebf9611d7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright (c) 2014 by Contributors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 1de92b7d6..99906fdc6 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -20,6 +20,7 @@ AllreduceRobust::AllreduceRobust(void) { result_buffer_round = 1; num_local_replica = 0; seq_counter = 0; + local_chkpt_version = 0; } /*! \brief shutdown the engine */ void AllreduceRobust::Shutdown(void) { @@ -619,16 +620,16 @@ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) { // check in local data RecoverType role = requester ? kRequestData : kHaveData; ReturnType succ; - if (num_local_replica != 0) { + if (false) { if (requester) { // clear existing history, if any, before load local_rptr[local_chkpt_version].clear(); local_chkpt[local_chkpt_version].clear(); } // recover local checkpoint - succ = TryRecoverLocalState(&local_rptr[local_chkpt_version], - &local_chkpt[local_chkpt_version]); - if (succ != kSuccess) return succ; + //succ = TryRecoverLocalState(&local_rptr[local_chkpt_version], + //m&local_chkpt[local_chkpt_version]); + //if (succ != kSuccess) return succ; int nlocal = std::max(static_cast(local_rptr[local_chkpt_version].size()) - 1, 0); // check if everyone is OK unsigned state = 0; @@ -817,7 +818,8 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, utils::Assert(chkpt.length() == 0, "local chkpt space inconsistent"); } const int n = num_local_replica; - {// backward passing, passing state in backward direction of the ring + utils::LogPrintf("[%d] backward!!\n", rabit::GetRank()); + if(false){// backward passing, passing state in backward direction of the ring const int nlocal = static_cast(rptr.size() - 1); utils::Assert(nlocal <= n + 1, "invalid local replica"); std::vector msg_back(n + 1); @@ -870,6 +872,8 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ; } } + + utils::LogPrintf("[%d] FORward!!\n", rabit::GetRank()); {// forward passing, passing state in forward direction of the ring const int nlocal = static_cast(rptr.size() - 1); utils::Assert(nlocal <= n + 1, "invalid local replica"); @@ -933,6 +937,7 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ; } } + utils::LogPrintf("[%d] Finished!!\n", rabit::GetRank()); return kSuccess; } /*! diff --git a/test/test_local_recover.cpp b/test/test_local_recover.cpp index 87262ba7b..27d4541a4 100644 --- a/test/test_local_recover.cpp +++ b/test/test_local_recover.cpp @@ -41,7 +41,7 @@ inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, in } mock.Allreduce(&ndata[0], ndata.size()); if (ntrial == iter && rank == 3) { - //exit(-1); + throw MockException(); } for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index 2b72cde75..ae5369dc2 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -41,7 +41,7 @@ inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { } mock.Allreduce(&ndata[0], ndata.size()); if (ntrial == iter && rank == 3) { - exit(-1); + // exit(-1); } for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; @@ -65,7 +65,7 @@ inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { mock.Allreduce(&ndata[0], ndata.size()); if (ntrial == iter && rank == 0) { - exit(-1); + throw MockException(); } for (size_t i = 0; i < ndata.size(); ++i) { From 31403a41cd93d281089ed4e96e2df1cf8cd3549c Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Tue, 9 Dec 2014 21:03:41 -0800 Subject: [PATCH 114/531] Update rabit.h --- src/rabit.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rabit.h b/src/rabit.h index f19792442..c7cde6b4b 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -10,6 +10,7 @@ */ #include #include +#include "./io.h" #include "./engine.h" /*! \brief namespace of rabit */ From dbd05a65b50ce2f0f55c13c3502bde9d0a93ee91 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 18 Dec 2014 18:39:24 -0800 Subject: [PATCH 115/531] nice fix, start check local check --- src/allreduce_robust.cc | 35 +++++++++++++++++------------------ test/test_model_recover.cpp | 23 ++++++++++++----------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 99906fdc6..e88e9db30 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -151,18 +151,21 @@ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, resbuf.Clear(); seq_counter = 0; // load from buffer utils::MemoryBufferStream fs(&global_checkpoint); - fs.Read(&version_number, sizeof(version_number)); - if (version_number == 0) return version_number; - global_model->Load(fs); + if (global_checkpoint.length() == 0) { + version_number = 0; + } else { + utils::Assert(fs.Read(&version_number, sizeof(version_number)) != 0, "read in version number"); + global_model->Load(fs); + } // run another phase of check ack, if recovered from data utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp), "check ack must return true"); return version_number; } else { // reset result buffer - resbuf.Clear(); seq_counter = 0; + resbuf.Clear(); seq_counter = 0; version_number = 0; // nothing loaded, a fresh start, everyone init model - return false; + return version_number; } } /*! @@ -185,8 +188,8 @@ void AllreduceRobust::CheckPoint(const utils::ISerializable *global_model, const utils::ISerializable *local_model) { if (num_local_replica == 0) { utils::Check(local_model == NULL, "need to set num_local_replica larger than 1 to checkpoint local_model"); - } - if (num_local_replica != 0) { + } + if (num_local_replica != 0) { while (true) { if (RecoverExec(NULL, 0, 0, ActionSummary::kLocalCheckPoint)) break; // save model model to new version place @@ -516,7 +519,7 @@ AllreduceRobust::TryRecoverData(RecoverType role, int recv_link, const std::vector &req_in) { RefLinkVector &links = tree_links; - // no need to run recovery for zero size message + // no need to run recovery for zero size messages if (links.size() == 0 || size == 0) return kSuccess; utils::Assert(req_in.size() == links.size(), "TryRecoverData"); const int nlink = static_cast(links.size()); @@ -542,7 +545,7 @@ AllreduceRobust::TryRecoverData(RecoverType role, if (i == recv_link && links[i].size_read != size) { selecter.WatchRead(links[i].sock); finished = false; - } + } if (req_in[i] && links[i].size_write != size) { if (role == kHaveData || (role == kPassData && links[recv_link].size_read != links[i].size_write)) { @@ -620,16 +623,16 @@ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) { // check in local data RecoverType role = requester ? kRequestData : kHaveData; ReturnType succ; - if (false) { + if (num_local_replica != 0) { if (requester) { // clear existing history, if any, before load local_rptr[local_chkpt_version].clear(); local_chkpt[local_chkpt_version].clear(); } // recover local checkpoint - //succ = TryRecoverLocalState(&local_rptr[local_chkpt_version], - //m&local_chkpt[local_chkpt_version]); - //if (succ != kSuccess) return succ; + succ = TryRecoverLocalState(&local_rptr[local_chkpt_version], + &local_chkpt[local_chkpt_version]); + if (succ != kSuccess) return succ; int nlocal = std::max(static_cast(local_rptr[local_chkpt_version].size()) - 1, 0); // check if everyone is OK unsigned state = 0; @@ -818,8 +821,7 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, utils::Assert(chkpt.length() == 0, "local chkpt space inconsistent"); } const int n = num_local_replica; - utils::LogPrintf("[%d] backward!!\n", rabit::GetRank()); - if(false){// backward passing, passing state in backward direction of the ring + {// backward passing, passing state in backward direction of the ring const int nlocal = static_cast(rptr.size() - 1); utils::Assert(nlocal <= n + 1, "invalid local replica"); std::vector msg_back(n + 1); @@ -872,8 +874,6 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ; } } - - utils::LogPrintf("[%d] FORward!!\n", rabit::GetRank()); {// forward passing, passing state in forward direction of the ring const int nlocal = static_cast(rptr.size() - 1); utils::Assert(nlocal <= n + 1, "invalid local replica"); @@ -937,7 +937,6 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ; } } - utils::LogPrintf("[%d] Finished!!\n", rabit::GetRank()); return kSuccess; } /*! diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index ae5369dc2..17432c06e 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -26,6 +26,7 @@ class Model : public rabit::utils::ISerializable { fo.Write(data); } virtual void InitModel(size_t n) { + data.clear(); data.resize(n, 1.0f); } }; @@ -40,15 +41,15 @@ inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { ndata[i] = (i * (rank+1)) % z + model->data[i]; } mock.Allreduce(&ndata[0], ndata.size()); - if (ntrial == iter && rank == 3) { - // exit(-1); + if (ntrial == 0 && rank == 3) { + exit(-1); } for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; for (int r = 0; r < nproc; ++r) { rmax = std::max(rmax, (float)((i * (r+1)) % z) + model->data[i]); } - utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank); + utils::Check(rmax == ndata[i], "[%d] TestMax check failurem i=%lu, rmax=%f, ndata=%f", rank, i, rmax, ndata[i]); } model->data = ndata; } @@ -62,12 +63,12 @@ inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + model->data[i]; } - mock.Allreduce(&ndata[0], ndata.size()); - - if (ntrial == iter && rank == 0) { + if (iter == 0 && ntrial==0 && rank == 0) { throw MockException(); } + mock.Allreduce(&ndata[0], ndata.size()); + for (size_t i = 0; i < ndata.size(); ++i) { float rsum = model->data[i] * nproc; for (int r = 0; r < nproc; ++r) { @@ -125,11 +126,11 @@ int main(int argc, char *argv[]) { for (int r = iter; r < 3; ++r) { TestMax(mock, &model, ntrial, r); utils::LogPrintf("[%d] !!!TestMax pass, iter=%d\n", rank, r); - int step = std::max(nproc / 3, 1); - for (int i = 0; i < nproc; i += step) { - TestBcast(mock, n, i, ntrial); - } - utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); + //int step = std::max(nproc / 3, 1); + //for (int i = 0; i < nproc; i += step) { + //TestBcast(mock, n, i, ntrial); + //} + //utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); TestSum(mock, &model, ntrial, r); utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); rabit::CheckPoint(&model); From c8faed0b541194c7048ad05ff0b9509f702c2c00 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 18 Dec 2014 18:53:58 -0800 Subject: [PATCH 116/531] pass local model recover test --- src/allreduce_robust.cc | 17 ++++++++++++----- test/test_local_recover.cpp | 14 +++++++------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index e88e9db30..f2f75c19e 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -141,12 +141,17 @@ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, } // check if we succesful if (RecoverExec(NULL, 0, ActionSummary::kLoadCheck, ActionSummary::kSpecialOp)) { + int nlocal = std::max(static_cast(local_rptr[local_chkpt_version].size()) - 1, 0); if (local_model != NULL) { - // load in local model - utils::MemoryFixSizeBuffer fs(BeginPtr(local_chkpt[local_chkpt_version]), - local_rptr[local_chkpt_version][1]); - local_model->Load(fs); - } + if (nlocal == num_local_replica + 1) { + // load in local model + utils::MemoryFixSizeBuffer fs(BeginPtr(local_chkpt[local_chkpt_version]), + local_rptr[local_chkpt_version][1]); + local_model->Load(fs); + } else { + utils::Assert(nlocal == 0, "[%d] local model inconsistent, nlocal=%d", rank, nlocal); + } + } // reset result buffer resbuf.Clear(); seq_counter = 0; // load from buffer @@ -156,6 +161,8 @@ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, } else { utils::Assert(fs.Read(&version_number, sizeof(version_number)) != 0, "read in version number"); global_model->Load(fs); + utils::Assert(local_model == NULL || nlocal == num_local_replica + 1, + "local model inconsistent, nlocal=%d", nlocal); } // run another phase of check ack, if recovered from data utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp), diff --git a/test/test_local_recover.cpp b/test/test_local_recover.cpp index 27d4541a4..106e04ef9 100644 --- a/test/test_local_recover.cpp +++ b/test/test_local_recover.cpp @@ -26,6 +26,7 @@ class Model : public rabit::utils::ISerializable { fo.Write(data); } virtual void InitModel(size_t n, float v) { + data.clear(); data.resize(n, v); } }; @@ -34,13 +35,13 @@ inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, in int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); const int z = iter + 111; - + std::vector ndata(model->data.size()); for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + local->data[i]; } mock.Allreduce(&ndata[0], ndata.size()); - if (ntrial == iter && rank == 3) { + if (ntrial == iter && rank == 1) { throw MockException(); } for (size_t i = 0; i < ndata.size(); ++i) { @@ -66,11 +67,10 @@ inline void TestSum(test::Mock &mock, Model *model, Model *local, int ntrial, in for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + local->data[i]; } - mock.Allreduce(&ndata[0], ndata.size()); - if (ntrial == iter && rank == 0) { - exit(-1); + throw MockException(); } + mock.Allreduce(&ndata[0], ndata.size()); for (size_t i = 0; i < ndata.size(); ++i) { float rsum = 0.0f; @@ -135,9 +135,9 @@ int main(int argc, char *argv[]) { utils::LogPrintf("[%d] !!!TestMax pass, iter=%d\n", rank, r); int step = std::max(nproc / 3, 1); for (int i = 0; i < nproc; i += step) { - TestBcast(mock, n, i, ntrial); + //TestBcast(mock, n, i, ntrial); } - utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); + //utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); TestSum(mock, &model, &local, ntrial, r); utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); rabit::CheckPoint(&model, &local); From aa2cb38543c960d7b16cb858c306c44400029160 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 18 Dec 2014 21:45:38 -0800 Subject: [PATCH 117/531] ResetLink still not ok --- src/allreduce_robust.cc | 34 ++++++++++++++++++---------------- src/allreduce_robust.h | 6 +++++- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index f2f75c19e..88a6dcace 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -249,7 +249,7 @@ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { for (int i = 0; i < nlink; ++i) { all_links[i].InitBuffer(sizeof(int), 1 << 10, reduce_buffer_size); all_links[i].ResetSize(); - } + } // read and discard data from all channels until pass mark while (true) { for (int i = 0; i < nlink; ++i) { @@ -283,7 +283,17 @@ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { } } while (true) { + utils::SelectHelper rsel; + bool finished = true; for (int i = 0; i < nlink; ++i) { + if (all_links[i].size_read == 0 && !all_links[i].sock.BadSocket()) { + rsel.WatchRead(all_links[i].sock); finished = false; + } + } + if (finished) break; + rsel.Select(); + for (int i = 0; i < nlink; ++i) { + if (all_links[i].sock.BadSocket()) continue; if (all_links[i].size_read == 0) { int atmark = all_links[i].sock.AtMark(); if (atmark < 0) { @@ -299,17 +309,7 @@ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { } } } - utils::SelectHelper rsel; - bool finished = true; - for (int i = 0; i < nlink; ++i) { - if (all_links[i].size_read == 0 && !all_links[i].sock.BadSocket()) { - rsel.WatchRead(all_links[i].sock); finished = false; - } - } - if (finished) break; - rsel.Select(); } - // start synchronization, use blocking I/O to avoid select for (int i = 0; i < nlink; ++i) { if (!all_links[i].sock.BadSocket()) { @@ -365,13 +365,15 @@ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { */ bool AllreduceRobust::CheckAndRecover(ReturnType err_type) { if (err_type == kSuccess) return true; - // simple way, shutdown all links - for (size_t i = 0; i < all_links.size(); ++i) { - if (!all_links[i].sock.BadSocket()) all_links[i].sock.Close(); + {// simple way, shutdown all links + for (size_t i = 0; i < all_links.size(); ++i) { + if (!all_links[i].sock.BadSocket()) all_links[i].sock.Close(); + } + ReConnectLinks("recover"); + return false; } - ReConnectLinks("recover"); - return false; // this was old way + // TryResetLinks still causes possible errors, so not use this one while(err_type != kSuccess) { switch(err_type) { case kGetExcept: err_type = TryResetLinks(); break; diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index e43e9ac66..92c682b12 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -95,7 +95,11 @@ class AllreduceRobust : public AllreduceBase { * this function is only used for test purpose */ virtual void InitAfterException(void) { - this->CheckAndRecover(kGetExcept); + // simple way, shutdown all links + for (size_t i = 0; i < all_links.size(); ++i) { + if (!all_links[i].sock.BadSocket()) all_links[i].sock.Close(); + } + ReConnectLinks("recover"); } private: From 58331067f828661ceed3c924f39361f3b9aeb02a Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 18 Dec 2014 23:50:59 -0800 Subject: [PATCH 118/531] cleanup testcases --- src/allreduce_base.cc | 12 ++-- src/allreduce_robust.cc | 8 +-- submit_hadoop.py | 1 - submit_mpi.py | 3 +- test/Makefile | 19 +++--- test/README.md | 18 ++++++ {src => test}/config.h | 0 test/keepalive.sh | 6 +- {src => test}/mock.h | 3 + test/speed_runner.py | 2 +- test/speed_test.cpp | 4 +- test/test.mk | 20 ++++++ test/test_allreduce.cpp | 90 -------------------------- test/test_local_recover.cpp | 68 ++++++++++++-------- test/test_model_recover.cpp | 69 ++++++++++++-------- test/test_recover.cpp | 125 ------------------------------------ test/testcase0.conf | 1 - test/testcase1.conf | 9 --- 18 files changed, 152 insertions(+), 306 deletions(-) create mode 100644 test/README.md rename {src => test}/config.h (100%) rename {src => test}/mock.h (99%) create mode 100644 test/test.mk delete mode 100644 test/test_allreduce.cpp delete mode 100644 test/test_recover.cpp delete mode 100644 test/testcase0.conf delete mode 100644 test/testcase1.conf diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 72fa12e79..99e12561c 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -26,7 +26,7 @@ AllreduceBase::AllreduceBase(void) { hadoop_mode = 0; version_number = 0; task_id = "NULL"; - this->SetParam("reduce_buffer", "256MB"); + this->SetParam("rabit_reduce_buffer", "256MB"); } // initialization function @@ -38,8 +38,8 @@ void AllreduceBase::Init(void) { utils::Check(task_id != NULL, "hadoop_mode is set but cannot find mapred_task_id"); } if (task_id != NULL) { - this->SetParam("task_id", task_id); - this->SetParam("hadoop_mode", "1"); + this->SetParam("rabit_task_id", task_id); + this->SetParam("rabit_hadoop_mode", "1"); } } // start socket @@ -83,9 +83,9 @@ void AllreduceBase::Shutdown(void) { void AllreduceBase::SetParam(const char *name, const char *val) { if (!strcmp(name, "rabit_tracker_uri")) tracker_uri = val; if (!strcmp(name, "rabit_tracker_port")) tracker_port = atoi(val); - if (!strcmp(name, "task_id")) task_id = val; - if (!strcmp(name, "hadoop_mode")) hadoop_mode = atoi(val); - if (!strcmp(name, "reduce_buffer")) { + if (!strcmp(name, "rabit_task_id")) task_id = val; + if (!strcmp(name, "rabit_hadoop_mode")) hadoop_mode = atoi(val); + if (!strcmp(name, "rabit_reduce_buffer")) { char unit; unsigned long amount; if (sscanf(val, "%lu%c", &amount, &unit) == 2) { diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 88a6dcace..828f57e60 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -17,10 +17,10 @@ namespace rabit { namespace engine { AllreduceRobust::AllreduceRobust(void) { - result_buffer_round = 1; num_local_replica = 0; seq_counter = 0; local_chkpt_version = 0; + result_buffer_round = 1; } /*! \brief shutdown the engine */ void AllreduceRobust::Shutdown(void) { @@ -42,11 +42,11 @@ void AllreduceRobust::Shutdown(void) { */ void AllreduceRobust::SetParam(const char *name, const char *val) { AllreduceBase::SetParam(name, val); - if (!strcmp(name, "result_buffer_round")) result_buffer_round = atoi(val); - if (!strcmp(name, "result_replicate")) { + if (!strcmp(name, "rabit_buffer_round")) result_buffer_round = atoi(val); + if (!strcmp(name, "rabit_global_replica")) { result_buffer_round = std::max(world_size / atoi(val), 1); } - if (!strcmp(name, "num_local_replica")) { + if (!strcmp(name, "rabit_local_replica")) { num_local_replica = atoi(val); } } diff --git a/submit_hadoop.py b/submit_hadoop.py index ae315f60a..3852b9f1d 100755 --- a/submit_hadoop.py +++ b/submit_hadoop.py @@ -17,7 +17,6 @@ parser.add_argument('-hs', '--hadoop_streaming_jar', required=True) parser.add_argument('-i', '--input', required=True) parser.add_argument('-o', '--output', required=True) parser.add_argument('-m', '--mapper', required=True) -#parser.add_argument('-r', '--reducer', required=False) parser.add_argument('-k', '--nclusters', required=True, type=int) parser.add_argument('-itr', '--iterations', required=True, type=int) args = parser.parse_args() diff --git a/submit_mpi.py b/submit_mpi.py index 468604317..3b2b68c54 100755 --- a/submit_mpi.py +++ b/submit_mpi.py @@ -1,6 +1,6 @@ #!/usr/bin/python """ -This is an example script to create a customized job submit +This is an example script to create a customized job submit with mpi script using rabit engine """ import sys @@ -34,6 +34,7 @@ def mpi_submit(nslave, args): if __name__ == '__main__': if len(sys.argv) < 2: print 'Usage: ' + print 'if == local, we will run using local mode' exit(0) # call submit, with nslave, the commands to run each job and submit function tracker.submit(int(sys.argv[1]), sys.argv[2:], fun_submit= mpi_submit) diff --git a/test/Makefile b/test/Makefile index 9f742be74..2f3b81251 100644 --- a/test/Makefile +++ b/test/Makefile @@ -5,33 +5,30 @@ export LDFLAGS= -pthread -lm -lrt export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src # specify tensor path -BIN = test_allreduce test_recover test_model_recover speed_test test_local_recover +BIN = speed_test test_model_recover test_local_recover # objectives that makes up rabit library RABIT_OBJ = allreduce_base.o allreduce_robust.o engine.o MPIOBJ = engine_mpi.o -OBJ = $(RABIT_OBJ) test_allreduce.o test_recover.o test_model_recover.o speed_test.o test_local_recover.o -MPIBIN = test_allreduce.mpi speed_test.mpi +OBJ = $(RABIT_OBJ) speed_test.o test_model_recover.o test_local_recover.o +MPIBIN = speed_test.mpi .PHONY: clean all all: $(BIN) $(MPIBIN) - +# the rabit library allreduce_base.o: ../src/allreduce_base.cc ../src/*.h engine.o: ../src/engine.cc ../src/*.h -allreduce_robust.o: ../src/allreduce_robust.cc ../src/*.h engine_mpi.o: ../src/engine_mpi.cc -test_allreduce.o: test_allreduce.cpp ../src/*.h +allreduce_robust.o: ../src/allreduce_robust.cc ../src/*.h + +# programs speed_test.o: speed_test.cpp ../src/*.h -test_recover.o: test_recover.cpp ../src/*.h test_model_recover.o: test_model_recover.cpp ../src/*.h test_local_recover.o: test_local_recover.cpp ../src/*.h # we can link against MPI version to get use MPI -test_allreduce: test_allreduce.o $(RABIT_OBJ) -test_allreduce.mpi: test_allreduce.o $(MPIOBJ) speed_test: speed_test.o $(RABIT_OBJ) speed_test.mpi: speed_test.o $(MPIOBJ) -test_recover: test_recover.o $(RABIT_OBJ) test_model_recover: test_model_recover.o $(RABIT_OBJ) test_local_recover: test_local_recover.o $(RABIT_OBJ) @@ -48,4 +45,4 @@ $(MPIOBJ) : $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) clean: - $(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~ + $(RM) $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) *~ ../src/*~ diff --git a/test/README.md b/test/README.md new file mode 100644 index 000000000..fb68112bf --- /dev/null +++ b/test/README.md @@ -0,0 +1,18 @@ +Testcases of Rabit +==== +This folder contains internal testcases to test correctness and efficiency of rabit API + +The example running scripts for testcases are given by test.mk +* type ```make -f test.mk testcasename``` to run certain testcase + + +Helper Scripts +==== +* test.mk contains Makefile documentation of all testcases +* keepalive.sh helper bash to restart a program when it dies abnormally + +List of Programs +==== +* speed_test: test the running speed of rabit API +* test_local_recover: test recovery of local state when error happens +* test_model_recover: test recovery of global state when error happens diff --git a/src/config.h b/test/config.h similarity index 100% rename from src/config.h rename to test/config.h diff --git a/test/keepalive.sh b/test/keepalive.sh index ddfc5d618..854de0c33 100755 --- a/test/keepalive.sh +++ b/test/keepalive.sh @@ -6,9 +6,9 @@ then exit -1 fi nrep=0 -echo ./$@ task_id=$OMPI_COMM_WORLD_RANK -until ./$@ task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do +echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK +until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do sleep 1 nrep=$((nrep+1)) - echo ./$@ job_id=$OMPI_COMM_WORLD_RANK repeat=$nrep + echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep done diff --git a/src/mock.h b/test/mock.h similarity index 99% rename from src/mock.h rename to test/mock.h index e5a4c283a..a5ac39c83 100644 --- a/src/mock.h +++ b/test/mock.h @@ -11,6 +11,9 @@ #include #include +struct MockException { +}; + namespace rabit { /*! \brief namespace of mock */ namespace test { diff --git a/test/speed_runner.py b/test/speed_runner.py index 7331c9075..1644bfe99 100644 --- a/test/speed_runner.py +++ b/test/speed_runner.py @@ -31,4 +31,4 @@ def main(): sys.stderr.flush() if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/test/speed_test.cpp b/test/speed_test.cpp index c4410a562..8f7fc68bf 100644 --- a/test/speed_test.cpp +++ b/test/speed_test.cpp @@ -1,3 +1,4 @@ +// This program is used to test the speed of rabit API #include #include #include @@ -12,8 +13,6 @@ double max_tdiff, sum_tdiff, bcast_tdiff, tot_tdiff; inline void TestMax(size_t n) { int rank = rabit::GetRank(); - //int nproc = rabit::GetWorldSize(); - std::vector ndata(n); for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % 111; @@ -25,7 +24,6 @@ inline void TestMax(size_t n) { inline void TestSum(size_t n) { int rank = rabit::GetRank(); - //int nproc = rabit::GetWorldSize(); const int z = 131; std::vector ndata(n); for (size_t i = 0; i < ndata.size(); ++i) { diff --git a/test/test.mk b/test/test.mk new file mode 100644 index 000000000..a70fcf050 --- /dev/null +++ b/test/test.mk @@ -0,0 +1,20 @@ +ifndef $(nslave) + nslave=2 +endif +ifndef $(ndata) + ndata=10 +endif + +# this is a makefile used to show testcases of rabit +.PHONY: model_recover local_recover speed + + +local_recover: + ../submit_mpi.py $(nslave) local test_local_recover $(ndata) rabit_local_replica=1 + +local_recover_10_10k: + ../submit_mpi.py 10 local test_local_recover 10000 rabit_local_replica=1 + +# this experiment test recovery with actually process exit, use keepalive to keep program alive +model_recover_10_10k: + ../submit_mpi.py 10 local keepalive.sh test_model_recover 10000 diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp deleted file mode 100644 index 707b1a22a..000000000 --- a/test/test_allreduce.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include -#include -#include -#include -#include - -using namespace rabit; - -inline void TestMax(test::Mock &mock, size_t n) { - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % 111; - } - mock.Allreduce(&ndata[0], ndata.size()); - for (size_t i = 0; i < ndata.size(); ++i) { - float rmax = (i * 1) % 111; - for (int r = 0; r < nproc; ++r) { - rmax = std::max(rmax, (float)((i * (r+1)) % 111)); - } - utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank); - } -} - -inline void TestSum(test::Mock &mock, size_t n) { - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - const int z = 131; - - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % z; - } - mock.Allreduce(&ndata[0], ndata.size()); - for (size_t i = 0; i < ndata.size(); ++i) { - float rsum = 0.0f; - for (int r = 0; r < nproc; ++r) { - rsum += (float)((i * (r+1)) % z); - } - utils::Check(fabsf(rsum - ndata[i]) < 1e-5 , - "[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]); - } -} - -inline void TestBcast(test::Mock &mock, size_t n, int root) { - int rank = rabit::GetRank(); - std::string s; s.resize(n); - for (size_t i = 0; i < n; ++i) { - s[i] = char(i % 126 + 1); - } - std::string res; - if (root == rank) { - res = s; - mock.Broadcast(&res, root); - } else { - mock.Broadcast(&res, root); - } - utils::Check(res == s, "[%d] TestBcast fail", rank); -} - -int main(int argc, char *argv[]) { - if (argc < 3) { - printf("Usage: \n"); - return 0; - } - int n = atoi(argv[1]); - rabit::Init(argc, argv); - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - std::string name = rabit::GetProcessorName(); - - test::Mock mock(rank, argv[2], argv[3]); - - utils::LogPrintf("[%d] start at %s\n", rank, name.c_str()); - TestMax(mock, n); - utils::LogPrintf("[%d] !!!TestMax pass\n", rank); - TestSum(mock, n); - utils::LogPrintf("[%d] !!!TestSum pass\n", rank); - int step = std::max(nproc / 3, 1); - for (int i = 0; i < nproc; i += step) { - TestBcast(mock, n, i); - } - utils::LogPrintf("[%d] !!!TestBcast pass\n", rank); - rabit::Finalize(); - printf("[%d] all check pass\n", rank); - return 0; -} diff --git a/test/test_local_recover.cpp b/test/test_local_recover.cpp index 106e04ef9..2d2c8234c 100644 --- a/test/test_local_recover.cpp +++ b/test/test_local_recover.cpp @@ -5,12 +5,28 @@ #include #include #include -#include +#include "./mock.h" using namespace rabit; - -struct MockException { -}; +namespace rabit { +namespace test { +inline void CallBegin(const char *fun, int ntrial, int iter) { + int rank = rabit::GetRank(); + if (!strcmp(fun, "Allreduce::Sum")) { + if (ntrial == iter && rank == 0) throw MockException(); + } + if (!strcmp(fun, "Allreduce::Max")) { + if (ntrial == iter && rank == 3) throw MockException(); + } +} +inline void CallEnd(const char *fun, int ntrial, int iter) { + int rank = rabit::GetRank(); + if (!strcmp(fun, "Allreduce::Bcast")) { + if (ntrial == iter && rand() % 10 == rank) throw MockException(); + } +} +} +} // dummy model class Model : public rabit::utils::ISerializable { @@ -31,7 +47,7 @@ class Model : public rabit::utils::ISerializable { } }; -inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, int iter) { +inline void TestMax(Model *model, Model *local, int ntrial, int iter) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); const int z = iter + 111; @@ -39,11 +55,11 @@ inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, in std::vector ndata(model->data.size()); for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + local->data[i]; - } - mock.Allreduce(&ndata[0], ndata.size()); - if (ntrial == iter && rank == 1) { - throw MockException(); - } + } + test::CallBegin("Allreduce::Max", ntrial, iter); + rabit::Allreduce(&ndata[0], ndata.size()); + test::CallEnd("Allreduce::Max", ntrial, iter); + for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; for (int r = 0; r < nproc; ++r) { @@ -58,7 +74,7 @@ inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, in } } -inline void TestSum(test::Mock &mock, Model *model, Model *local, int ntrial, int iter) { +inline void TestSum(Model *model, Model *local, int ntrial, int iter) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); const int z = 131 + iter; @@ -67,11 +83,10 @@ inline void TestSum(test::Mock &mock, Model *model, Model *local, int ntrial, in for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + local->data[i]; } - if (ntrial == iter && rank == 0) { - throw MockException(); - } - mock.Allreduce(&ndata[0], ndata.size()); - + test::CallBegin("Allreduce::Sum", ntrial, iter); + Allreduce(&ndata[0], ndata.size()); + test::CallEnd("Allreduce::Sum", ntrial, iter); + for (size_t i = 0; i < ndata.size(); ++i) { float rsum = 0.0f; for (int r = 0; r < nproc; ++r) { @@ -86,7 +101,7 @@ inline void TestSum(test::Mock &mock, Model *model, Model *local, int ntrial, in } } -inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { +inline void TestBcast(size_t n, int root, int ntrial, int iter) { int rank = rabit::GetRank(); std::string s; s.resize(n); for (size_t i = 0; i < n; ++i) { @@ -95,16 +110,20 @@ inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { std::string res; if (root == rank) { res = s; - mock.Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); + rabit::Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); } else { - mock.Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); + rabit::Broadcast(&res, root); + test::CallEnd("Broadcast", ntrial, iter); } utils::Check(res == s, "[%d] TestBcast fail", rank); } int main(int argc, char *argv[]) { if (argc < 3) { - printf("Usage: \n"); + printf("Usage: \n"); return 0; } int n = atoi(argv[1]); @@ -112,7 +131,6 @@ int main(int argc, char *argv[]) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); std::string name = rabit::GetProcessorName(); - test::Mock mock(rank, argv[2], argv[3]); Model model, local; srand(0); int ntrial = 0; @@ -131,14 +149,14 @@ int main(int argc, char *argv[]) { utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } for (int r = iter; r < 3; ++r) { - TestMax(mock, &model, &local, ntrial, r); + TestMax(&model, &local, ntrial, r); utils::LogPrintf("[%d] !!!TestMax pass, iter=%d\n", rank, r); int step = std::max(nproc / 3, 1); for (int i = 0; i < nproc; i += step) { - //TestBcast(mock, n, i, ntrial); + TestBcast(n, i, ntrial, r); } - //utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); - TestSum(mock, &model, &local, ntrial, r); + utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); + TestSum(&model, &local, ntrial, r); utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); rabit::CheckPoint(&model, &local); utils::LogPrintf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index 17432c06e..6feb56dde 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -5,12 +5,28 @@ #include #include #include -#include +#include "./mock.h" using namespace rabit; - -struct MockException { -}; +namespace rabit { +namespace test { +inline void CallBegin(const char *fun, int ntrial, int iter) { + int rank = rabit::GetRank(); + if (!strcmp(fun, "Allreduce::Sum")) { + if (ntrial == iter && rank == 0) exit(-1); + } + if (!strcmp(fun, "Allreduce::Max")) { + if (ntrial == iter && rank == 3) exit(-1); + } +} +inline void CallEnd(const char *fun, int ntrial, int iter) { + int rank = rabit::GetRank(); + if (!strcmp(fun, "Allreduce::Bcast")) { + if (ntrial == iter && rand() % 10 == rank) exit(-1); + } +} +} +} // dummy model class Model : public rabit::utils::ISerializable { @@ -31,7 +47,7 @@ class Model : public rabit::utils::ISerializable { } }; -inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { +inline void TestMax(Model *model, int ntrial, int iter) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); const int z = iter + 111; @@ -40,10 +56,10 @@ inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + model->data[i]; } - mock.Allreduce(&ndata[0], ndata.size()); - if (ntrial == 0 && rank == 3) { - exit(-1); - } + test::CallBegin("Allreduce::Max", ntrial, iter); + rabit::Allreduce(&ndata[0], ndata.size()); + test::CallEnd("Allreduce::Max", ntrial, iter); + for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; for (int r = 0; r < nproc; ++r) { @@ -54,7 +70,7 @@ inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { model->data = ndata; } -inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { +inline void TestSum(Model *model, int ntrial, int iter) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); const int z = 131 + iter; @@ -63,11 +79,9 @@ inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + model->data[i]; } - if (iter == 0 && ntrial==0 && rank == 0) { - throw MockException(); - } - - mock.Allreduce(&ndata[0], ndata.size()); + test::CallBegin("Allreduce::Sum", ntrial, iter); + Allreduce(&ndata[0], ndata.size()); + test::CallEnd("Allreduce::Sum", ntrial, iter); for (size_t i = 0; i < ndata.size(); ++i) { float rsum = model->data[i] * nproc; @@ -80,7 +94,7 @@ inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { model->data = ndata; } -inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { +inline void TestBcast(size_t n, int root, int ntrial, int iter) { int rank = rabit::GetRank(); std::string s; s.resize(n); for (size_t i = 0; i < n; ++i) { @@ -89,9 +103,13 @@ inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { std::string res; if (root == rank) { res = s; - mock.Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); + rabit::Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); } else { - mock.Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); + rabit::Broadcast(&res, root); + test::CallEnd("Broadcast", ntrial, iter); } utils::Check(res == s, "[%d] TestBcast fail", rank); } @@ -106,7 +124,6 @@ int main(int argc, char *argv[]) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); std::string name = rabit::GetProcessorName(); - test::Mock mock(rank, argv[2], argv[3]); Model model; srand(0); int ntrial = 0; @@ -124,14 +141,14 @@ int main(int argc, char *argv[]) { utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } for (int r = iter; r < 3; ++r) { - TestMax(mock, &model, ntrial, r); + TestMax(&model, ntrial, r); utils::LogPrintf("[%d] !!!TestMax pass, iter=%d\n", rank, r); - //int step = std::max(nproc / 3, 1); - //for (int i = 0; i < nproc; i += step) { - //TestBcast(mock, n, i, ntrial); - //} - //utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); - TestSum(mock, &model, ntrial, r); + int step = std::max(nproc / 3, 1); + for (int i = 0; i < nproc; i += step) { + TestBcast(n, i, ntrial, r); + } + utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); + TestSum(&model, ntrial, r); utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); rabit::CheckPoint(&model); utils::LogPrintf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); diff --git a/test/test_recover.cpp b/test/test_recover.cpp deleted file mode 100644 index 92aa60918..000000000 --- a/test/test_recover.cpp +++ /dev/null @@ -1,125 +0,0 @@ -#include -#include -#include -#include -#include -#include - -using namespace rabit; - -struct MockException { -}; - -inline void TestMax(test::Mock &mock, size_t n, int ntrial) { - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % 111; - } - mock.Allreduce(&ndata[0], ndata.size()); - if (ntrial == 0 && rank == 15) throw MockException(); - for (size_t i = 0; i < ndata.size(); ++i) { - float rmax = (i * 1) % 111; - for (int r = 0; r < nproc; ++r) { - rmax = std::max(rmax, (float)((i * (r+1)) % 111)); - } - utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank); - } -} - -inline void TestSum(test::Mock &mock, size_t n, int ntrial) { - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - const int z = 131; - - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % z; - } - mock.Allreduce(&ndata[0], ndata.size()); - - if (ntrial == 0 && rank == 0) throw MockException(); - - for (size_t i = 0; i < ndata.size(); ++i) { - float rsum = 0.0f; - for (int r = 0; r < nproc; ++r) { - rsum += (float)((i * (r+1)) % z); - } - utils::Check(fabsf(rsum - ndata[i]) < 1e-5 , - "[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]); - } -} - -inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { - int rank = rabit::GetRank(); - std::string s; s.resize(n); - for (size_t i = 0; i < n; ++i) { - s[i] = char(i % 126 + 1); - } - std::string res; - if (root == rank) { - res = s; - mock.Broadcast(&res, root); - } else { - mock.Broadcast(&res, root); - } - utils::Check(res == s, "[%d] TestBcast fail", rank); -} -// dummy model -class Model : public rabit::utils::ISerializable { - public: - // load from stream - virtual void Load(rabit::utils::IStream &fi) { - // do nothing - } - /*! \brief save the model to the stream */ - virtual void Save(rabit::utils::IStream &fo) const { - // do nothing - } - virtual void InitModel(void) { - // do nothing - } -}; - -int main(int argc, char *argv[]) { - if (argc < 3) { - printf("Usage: \n"); - return 0; - } - int n = atoi(argv[1]); - rabit::Init(argc, argv); - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - std::string name = rabit::GetProcessorName(); - test::Mock mock(rank, argv[2], argv[3]); - Model model; - srand(0); - int ntrial = 0; - while (true) { - try { - if (rabit::LoadCheckPoint(&model) == 0) { - model.InitModel(); - } - utils::LogPrintf("[%d/%d] start at %s\n", rank, ntrial, name.c_str()); - TestMax(mock, n, ntrial); - utils::LogPrintf("[%d/%d] !!!TestMax pass\n", rank, ntrial); - TestSum(mock, n, ntrial); - utils::LogPrintf("[%d/%d] !!!TestSum pass\n", rank, ntrial); - int step = std::max(nproc / 3, 1); - for (int i = 0; i < nproc; i += step) { - TestBcast(mock, n, i, ntrial); - } - utils::LogPrintf("[%d] !!!TestBcast pass\n", rank); - // reach here - break; - } catch (MockException &e) { - rabit::engine::GetEngine()->InitAfterException(); - ++ntrial; - } - } - rabit::Finalize(); - printf("[%d] all check pass\n", rank); - return 0; -} diff --git a/test/testcase0.conf b/test/testcase0.conf deleted file mode 100644 index 4c324d282..000000000 --- a/test/testcase0.conf +++ /dev/null @@ -1 +0,0 @@ -# Test Case 0 -> nothing fails \ No newline at end of file diff --git a/test/testcase1.conf b/test/testcase1.conf deleted file mode 100644 index cc9bd662c..000000000 --- a/test/testcase1.conf +++ /dev/null @@ -1,9 +0,0 @@ -# Test Case example config -# You configure which methods should fail -# Format _ = -# can be one of the following = allreduce, broadcast, loadcheckpoint, checkpoint - -1_0 = allreduce -1_1 = broadcast - -2_2 = allreduce From 1754fdbf4ea8e8db4ce5e2b7f06cb0ce5f7315f2 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 19 Dec 2014 02:00:43 -0800 Subject: [PATCH 119/531] enable support for lambda preprocessing function, and c++11 --- src/allreduce_base.h | 9 ++++++++- src/allreduce_robust.cc | 9 ++++++++- src/allreduce_robust.h | 12 ++++++++--- src/engine.cc | 6 ++++-- src/engine.h | 26 ++++++++++++++++++++---- src/engine_mpi.cc | 9 +++++++-- src/rabit-inl.h | 19 ++++++++++++++++-- src/rabit.h | 40 ++++++++++++++++++++++++++++++++++++- test/Makefile | 2 +- test/test_local_recover.cpp | 15 ++++++++------ 10 files changed, 124 insertions(+), 23 deletions(-) diff --git a/src/allreduce_base.h b/src/allreduce_base.h index 4ef4a044e..bc7cc26c9 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -64,11 +64,18 @@ class AllreduceBase : public IEngine { * \param type_nbytes the unit number of bytes the type have * \param count number of elements to be reduced * \param reducer reduce function + * \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \param prepare_arg argument used to passed into the lazy preprocessing function */ virtual void Allreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, - ReduceFunction reducer) { + ReduceFunction reducer, + PreprocFunction prepare_fun = NULL, + void *prepare_arg = NULL) { + if (prepare_fun != NULL) prepare_fun(prepare_arg); utils::Assert(TryAllreduce(sendrecvbuf_, type_nbytes, count, reducer) == kSuccess, "Allreduce failed"); } diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 828f57e60..7d339cf84 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -57,17 +57,24 @@ void AllreduceRobust::SetParam(const char *name, const char *val) { * \param type_nbytes the unit number of bytes the type have * \param count number of elements to be reduced * \param reducer reduce function + * \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \param prepare_arg argument used to passed into the lazy preprocessing function */ void AllreduceRobust::Allreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, - ReduceFunction reducer) { + ReduceFunction reducer, + PreprocFunction prepare_fun, + void *prepare_arg) { bool recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter); // now we are free to remove the last result, if any if (resbuf.LastSeqNo() != -1 && (resbuf.LastSeqNo() % result_buffer_round != rank % result_buffer_round)) { resbuf.DropLast(); } + if (!recovered && prepare_fun != NULL) prepare_fun(prepare_arg); void *temp = resbuf.AllocTemp(type_nbytes, count); while (true) { if (recovered) { diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index 92c682b12..7888a66f1 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -35,11 +35,17 @@ class AllreduceRobust : public AllreduceBase { * \param type_nbytes the unit number of bytes the type have * \param count number of elements to be reduced * \param reducer reduce function - */ + * \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \param prepare_arg argument used to passed into the lazy preprocessing function + */ virtual void Allreduce(void *sendrecvbuf_, size_t type_nbytes, - size_t count, - ReduceFunction reducer); + size_t count, + ReduceFunction reducer, + PreprocFunction prepare_fun = NULL, + void *prepare_arg = NULL); /*! * \brief broadcast data from root to all nodes * \param sendrecvbuf_ buffer for both sending and recving data diff --git a/src/engine.cc b/src/engine.cc index 0512ac503..cc6a48745 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -43,8 +43,10 @@ void Allreduce_(void *sendrecvbuf, size_t count, IEngine::ReduceFunction red, mpi::DataType dtype, - mpi::OpType op) { - GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red); + mpi::OpType op, + IEngine::PreprocFunction prepare_fun, + void *prepare_arg) { + GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun, prepare_arg); } } // namespace engine } // namespace rabit diff --git a/src/engine.h b/src/engine.h index e393e94db..977b0d6ff 100644 --- a/src/engine.h +++ b/src/engine.h @@ -20,6 +20,12 @@ namespace engine { class IEngine { public: /*! + * \brief Preprocessing function, that is called before AllReduce, + * used to prepare the data used by AllReduce + * \param arg additional possible argument used to invoke the preprocessor + */ + typedef void (PreprocFunction) (void *arg); + /*! * \brief reduce function, the same form of MPI reduce function is used, * to be compatible with MPI interface * In all the functions, the memory is ensured to aligned to 64-bit @@ -34,17 +40,23 @@ class IEngine { void *dst, int count, const MPI::Datatype &dtype); /*! - * \brief perform in-place allreduce, on sendrecvbuf + * \brief perform in-place allreduce, on sendrecvbuf * this function is NOT thread-safe * \param sendrecvbuf_ buffer for both sending and recving data * \param type_nbytes the unit number of bytes the type have * \param count number of elements to be reduced * \param reducer reduce function + * \param prepare_func Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \param prepare_arg argument used to passed into the lazy preprocessing function */ virtual void Allreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, - ReduceFunction reducer) = 0; + ReduceFunction reducer, + PreprocFunction prepare_fun = NULL, + void *prepare_arg = NULL) = 0; /*! * \brief broadcast data from root to all nodes * \param sendrecvbuf_ buffer for both sending and recving data @@ -145,13 +157,19 @@ enum DataType { * \param reducer reduce function * \param dtype the data type * \param op the reduce operator type + * \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \param prepare_arg argument used to passed into the lazy preprocessing function * */ void Allreduce_(void *sendrecvbuf, size_t type_nbytes, size_t count, - IEngine::ReduceFunction red, + IEngine::ReduceFunction red, mpi::DataType dtype, - mpi::OpType op); + mpi::OpType op, + IEngine::PreprocFunction prepare_fun = NULL, + void *prepare_arg = NULL); } // namespace engine } // namespace rabit #endif // RABIT_ENGINE_H diff --git a/src/engine_mpi.cc b/src/engine_mpi.cc index f32dba854..9e5972e1a 100644 --- a/src/engine_mpi.cc +++ b/src/engine_mpi.cc @@ -23,7 +23,9 @@ class MPIEngine : public IEngine { virtual void Allreduce(void *sendrecvbuf_, size_t type_nbytes, size_t count, - ReduceFunction reducer) { + ReduceFunction reducer, + PreprocFunction prepare_fun, + void *prepare_arg) { utils::Error("MPIEngine:: Allreduce is not supported, use Allreduce_ instead"); } virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) { @@ -110,7 +112,10 @@ void Allreduce_(void *sendrecvbuf, size_t count, IEngine::ReduceFunction red, mpi::DataType dtype, - mpi::OpType op) { + mpi::OpType op, + IEngine::PreprocFunction prepare_fun, + void *prepare_arg) { + if (prepare_fun != NULL) prepare_fun(prepare_arg); MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, GetType(dtype), GetOp(op)); } } // namespace engine diff --git a/src/rabit-inl.h b/src/rabit-inl.h index 95a2eb8fd..8d379f920 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -119,10 +119,25 @@ inline void Broadcast(std::string *sendrecv_data, int root) { // perform inplace Allreduce template -inline void Allreduce(DType *sendrecvbuf, size_t count) { +inline void Allreduce(DType *sendrecvbuf, size_t count, + void (*prepare_fun)(void *arg), + void *prepare_arg) { engine::Allreduce_(sendrecvbuf, sizeof(DType), count, op::Reducer, - engine::mpi::GetType(), OP::kType); + engine::mpi::GetType(), OP::kType, prepare_fun, prepare_arg); } + +// C++11 support for lambda prepare function +#if __cplusplus >= 201103L +inline void InvokeLambda_(void *fun) { + (*static_cast*>(fun))(); +} +template +inline void Allreduce(DType *sendrecvbuf, size_t count, std::function prepare_fun) { + engine::Allreduce_(sendrecvbuf, sizeof(DType), count, op::Reducer, + engine::mpi::GetType(), OP::kType, InvokeLambda_, &prepare_fun); +} +#endif // C++11 + // load latest check point inline int LoadCheckPoint(utils::ISerializable *global_model, utils::ISerializable *local_model) { diff --git a/src/rabit.h b/src/rabit.h index c7cde6b4b..ac17faec6 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -10,6 +10,11 @@ */ #include #include +// optionally support of lambda function in C++11, if available +#if __cplusplus >= 201103L +#include +#endif // C++11 +// rabit headers #include "./io.h" #include "./engine.h" @@ -78,11 +83,44 @@ inline void Broadcast(std::string *sendrecv_data, int root); * ... * \param sendrecvbuf buffer for both sending and recving data * \param count number of elements to be reduced + * \param prepare_func Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \param prepare_arg argument used to passed into the lazy preprocessing function * \tparam OP see namespace op, reduce operator * \tparam DType type of data */ template -inline void Allreduce(DType *sendrecvbuf, size_t count); +inline void Allreduce(DType *sendrecvbuf, size_t count, + void (*prepare_fun)(void *arg) = NULL, + void *prepare_arg = NULL); + +// C++11 support for lambda prepare function +#if __cplusplus >= 201103L +/*! + * \brief perform in-place allreduce, on sendrecvbuf + * with a prepare function specified by lambda function + * Example Usage: the following code gives sum of the result + * vector data(10); + * ... + * Allreduce(&data[0], data.size(), [&]() { + * for (int i = 0; i < 10; ++i) { + * data[i] = i; + * } + * }); + * ... + * \param sendrecvbuf buffer for both sending and recving data + * \param count number of elements to be reduced + * \param prepare_func Lazy lambda preprocessing function, prepare_fun() will be invoked + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \tparam OP see namespace op, reduce operator + * \tparam DType type of data + */ +template +inline void Allreduce(DType *sendrecvbuf, size_t count, std::function prepare_fun); +#endif // C++11 + /*! * \brief load latest check point * \param global_model pointer to the globally shared model/state diff --git a/test/Makefile b/test/Makefile index 2f3b81251..18d876b2e 100644 --- a/test/Makefile +++ b/test/Makefile @@ -2,7 +2,7 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx export LDFLAGS= -pthread -lm -lrt -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src -std=c++11 # specify tensor path BIN = speed_test test_model_recover test_local_recover diff --git a/test/test_local_recover.cpp b/test/test_local_recover.cpp index 2d2c8234c..d98c6ae48 100644 --- a/test/test_local_recover.cpp +++ b/test/test_local_recover.cpp @@ -50,14 +50,17 @@ class Model : public rabit::utils::ISerializable { inline void TestMax(Model *model, Model *local, int ntrial, int iter) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); - const int z = iter + 111; - + const int z = iter + 111; std::vector ndata(model->data.size()); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % z + local->data[i]; - } + test::CallBegin("Allreduce::Max", ntrial, iter); - rabit::Allreduce(&ndata[0], ndata.size()); + rabit::Allreduce(&ndata[0], ndata.size(), + [&]() { + // use lambda expression to prepare the data + for (size_t i = 0; i < ndata.size(); ++i) { + ndata[i] = (i * (rank+1)) % z + local->data[i]; + } + }); test::CallEnd("Allreduce::Max", ntrial, iter); for (size_t i = 0; i < ndata.size(); ++i) { From 69d7f71ae8cea28dc44a932d8856a1301bea623e Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 19 Dec 2014 02:12:53 -0800 Subject: [PATCH 120/531] change kmeans to using lambda --- toolkit/Makefile | 7 ++----- toolkit/kmeans.cpp | 29 ++++++++++++++++------------- toolkit/kmeans.sh | 8 -------- 3 files changed, 18 insertions(+), 26 deletions(-) delete mode 100755 toolkit/kmeans.sh diff --git a/toolkit/Makefile b/toolkit/Makefile index 4d96e6bab..429264ad3 100644 --- a/toolkit/Makefile +++ b/toolkit/Makefile @@ -2,15 +2,12 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx export LDFLAGS= -pthread -lm -L../lib -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src -std=c++11 # specify tensor path BIN = kmeans # objectives that makes up rabit library -RABIT_OBJ = allreduce_base.o allreduce_robust.o engine.o -MPIOBJ = engine_mpi.o - -OBJ = $(RABIT_OBJ) kmeans.o +OBJ = kmeans.o MPIBIN = kmeans.mpi .PHONY: clean all lib diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index e6dffd500..c08e50c23 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -114,20 +114,23 @@ int main(int argc, char *argv[]) { // matrix to store the result Matrix temp; for (int r = iter; r < max_iter; ++r) { - temp.Init(num_cluster, num_feat + 1, 0.0f); - const size_t ndata = data.NumRow(); - for (size_t i = 0; i < ndata; ++i) { - SparseMat::Vector v = data[i]; - size_t k = GetCluster(model.centroids, v); - // temp[k] += v - for (size_t j = 0; j < v.length; ++j) { - temp[k][v[j].findex] += v[j].fvalue; - } - // use last column to record counts - temp[k][num_feat] += 1.0f; - } + temp.Init(num_cluster, num_feat + 1, 0.0f); // call allreduce - rabit::Allreduce(&temp.data[0], temp.data.size()); + rabit::Allreduce(&temp.data[0], temp.data.size(), [&]() { + // lambda function used to calculate the data if necessary + // this function may not be called when the result can be directly recovered + const size_t ndata = data.NumRow(); + for (size_t i = 0; i < ndata; ++i) { + SparseMat::Vector v = data[i]; + size_t k = GetCluster(model.centroids, v); + // temp[k] += v + for (size_t j = 0; j < v.length; ++j) { + temp[k][v[j].findex] += v[j].fvalue; + } + // use last column to record counts + temp[k][num_feat] += 1.0f; + } + }); // set number for (int k = 0; k < num_cluster; ++k) { float cnt = temp[k][num_feat]; diff --git a/toolkit/kmeans.sh b/toolkit/kmeans.sh deleted file mode 100755 index 53235a318..000000000 --- a/toolkit/kmeans.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -if [ "$#" -lt 4 ]; -then - echo "Usage " - exit -1 -fi - -../submit_job.py $1 kmeans "${@:2}" From 9f42b78a18201ed3b3521f619329c27f662ff04c Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 19 Dec 2014 04:20:45 -0800 Subject: [PATCH 121/531] improve tracker script --- src/allreduce_base.cc | 22 +++++++++++--- submit_hadoop.py | 31 ------------------- test/test.mk | 6 ++-- tracker/rabit_hadoop.py | 44 +++++++++++++++++++++++++++ submit_mpi.py => tracker/rabit_mpi.py | 0 {src => tracker}/rabit_tracker.py | 33 ++++++++++++++------ 6 files changed, 88 insertions(+), 48 deletions(-) delete mode 100755 submit_hadoop.py create mode 100755 tracker/rabit_hadoop.py rename submit_mpi.py => tracker/rabit_mpi.py (100%) rename {src => tracker}/rabit_tracker.py (86%) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 99e12561c..d2ab14daa 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -22,7 +22,7 @@ AllreduceBase::AllreduceBase(void) { slave_port = 9010; nport_trial = 1000; rank = -1; - world_size = 1; + world_size = -1; hadoop_mode = 0; version_number = 0; task_id = "NULL"; @@ -31,8 +31,8 @@ AllreduceBase::AllreduceBase(void) { // initialization function void AllreduceBase::Init(void) { - { - // handling for hadoop + // setup from enviroment variables + {// handling for hadoop const char *task_id = getenv("mapred_task_id"); if (hadoop_mode != 0) { utils::Check(task_id != NULL, "hadoop_mode is set but cannot find mapred_task_id"); @@ -41,7 +41,16 @@ void AllreduceBase::Init(void) { this->SetParam("rabit_task_id", task_id); this->SetParam("rabit_hadoop_mode", "1"); } - } + // handling for hadoop + const char *num_task = getenv("mapred_map_tasks"); + if (hadoop_mode != 0) { + utils::Check(num_task != NULL, "hadoop_mode is set but cannot find mapred_map_tasks"); + } + if (num_task != NULL) { + this->SetParam("rabit_world_size", num_task); + } + } + //--------------------- // start socket utils::Socket::Startup(); utils::Assert(all_links.size() == 0, "can only call Init once"); @@ -70,6 +79,7 @@ void AllreduceBase::Shutdown(void) { utils::Check(magic == kMagic, "sync::Invalid tracker message, init failure"); utils::Assert(tracker.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); + utils::Assert(tracker.SendAll(&world_size, sizeof(world_size)) == sizeof(world_size), "ReConnectLink failure 3"); tracker.SendStr(task_id); tracker.SendStr(std::string("shutdown")); tracker.Close(); @@ -84,6 +94,7 @@ void AllreduceBase::SetParam(const char *name, const char *val) { if (!strcmp(name, "rabit_tracker_uri")) tracker_uri = val; if (!strcmp(name, "rabit_tracker_port")) tracker_port = atoi(val); if (!strcmp(name, "rabit_task_id")) task_id = val; + if (!strcmp(name, "rabit_world_size")) world_size = atoi(val); if (!strcmp(name, "rabit_hadoop_mode")) hadoop_mode = atoi(val); if (!strcmp(name, "rabit_reduce_buffer")) { char unit; @@ -108,7 +119,7 @@ void AllreduceBase::SetParam(const char *name, const char *val) { void AllreduceBase::ReConnectLinks(const char *cmd) { // single node mode if (tracker_uri == "NULL") { - rank = 0; return; + rank = 0; world_size = 1; return; } int magic = kMagic; // get information from tracker @@ -121,6 +132,7 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { utils::Assert(tracker.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); utils::Check(magic == kMagic, "sync::Invalid tracker message, init failure"); utils::Assert(tracker.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); + utils::Assert(tracker.SendAll(&world_size, sizeof(world_size)) == sizeof(world_size), "ReConnectLink failure 3"); tracker.SendStr(task_id); tracker.SendStr(std::string(cmd)); // the rank of previous link, next link in ring diff --git a/submit_hadoop.py b/submit_hadoop.py deleted file mode 100755 index 3852b9f1d..000000000 --- a/submit_hadoop.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/python -""" -This is an example job submit script for hadoop streaming -""" -import argparse -import sys -import os -import time -import subprocess -sys.path.append('./src/') -import rabit_tracker as tracker - -parser = argparse.ArgumentParser(description='Hadoop Streaming submission script') -parser.add_argument('-s', '--nslaves', required=True, type=int) -parser.add_argument('-hb', '--hadoop_binary', required=True) -parser.add_argument('-hs', '--hadoop_streaming_jar', required=True) -parser.add_argument('-i', '--input', required=True) -parser.add_argument('-o', '--output', required=True) -parser.add_argument('-m', '--mapper', required=True) -parser.add_argument('-k', '--nclusters', required=True, type=int) -parser.add_argument('-itr', '--iterations', required=True, type=int) -args = parser.parse_args() - -def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer \"/bin/cat\" -file %s' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper) - print cmd - subprocess.check_call(cmd, shell = True) - -start = time.time() -tracker.submit(args.nslaves, [], fun_submit= hadoop_streaming) -print 'All run took %s' % (time.time() - start) diff --git a/test/test.mk b/test/test.mk index a70fcf050..a7b6ceb51 100644 --- a/test/test.mk +++ b/test/test.mk @@ -10,11 +10,11 @@ endif local_recover: - ../submit_mpi.py $(nslave) local test_local_recover $(ndata) rabit_local_replica=1 + ../tracker/rabit_mpi.py $(nslave) local test_local_recover $(ndata) rabit_local_replica=1 local_recover_10_10k: - ../submit_mpi.py 10 local test_local_recover 10000 rabit_local_replica=1 + ../tracker/rabit_mpi.py 10 local test_local_recover 10000 rabit_local_replica=1 # this experiment test recovery with actually process exit, use keepalive to keep program alive model_recover_10_10k: - ../submit_mpi.py 10 local keepalive.sh test_model_recover 10000 + ../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py new file mode 100755 index 000000000..4a2cdb718 --- /dev/null +++ b/tracker/rabit_hadoop.py @@ -0,0 +1,44 @@ +#!/usr/bin/python +""" +This is a script to submit rabit job using hadoop streaming +submit the rabit process as mappers of MapReduce +""" +import argparse +import sys +import os +import time +import subprocess +import rabit_tracker as tracker + +#!!! you can directly set hadoop binary path and hadoop streaming path here +hadoop_binary = 'hadoop' +hadoop_streaming_jar = None + +parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using hadoop streaming') +parser.add_argument('-s', '--nslaves', required=True, type=int, + help = "number of slaves proccess to be launched") +if hadoop_binary == None: + parser.add_argument('-hb', '--hadoop_binary', required=True, + help="path-to-hadoop binary folder") +if hadoop_streaming_jar == None: + parser.add_argument('-hs', '--hadoop_streaming_jar', required=True, + help='path-to hadoop streamimg jar file') +parser.add_argument('-i', '--input', required=True) +parser.add_argument('-o', '--output', required=True) +parser.add_argument('-m', '--mapper', required=True) +parser.add_argument('-a', '--args', required=True) +args = parser.parse_args() + +if hadoop_binary != None: + args.hadoop_binary = hadoop_binary +if hadoop_streaming_jar != None: + args.hadoop_streaming_jar = hadoop_streaming_jar + +def hadoop_streaming(nslaves, slave_args): + cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer \"/bin/cat\" -file %s' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper) + print cmd + subprocess.check_call(cmd, shell = True) + +start = time.time() +tracker.submit(args.nslaves, [], fun_submit= hadoop_streaming) +print 'All run took %s' % (time.time() - start) diff --git a/submit_mpi.py b/tracker/rabit_mpi.py similarity index 100% rename from submit_mpi.py rename to tracker/rabit_mpi.py diff --git a/src/rabit_tracker.py b/tracker/rabit_tracker.py similarity index 86% rename from src/rabit_tracker.py rename to tracker/rabit_tracker.py index 025b5938a..8e05b4b5a 100644 --- a/src/rabit_tracker.py +++ b/tracker/rabit_tracker.py @@ -53,6 +53,7 @@ class SlaveEntry: assert magic == kMagic, 'invalid magic number=%d from %s' % (magic, s_addr[0]) slave.sendint(kMagic) self.rank = slave.recvint() + self.world_size = slave.recvint() self.jobid = slave.recvstr() self.cmd = slave.recvstr() @@ -188,32 +189,42 @@ class Tracker: ring_map[rlst[r]] = (rlst[rprev], rlst[rnext]) return ring_map def accept_slaves(self, nslave): - tree_map, parent_map = self.get_tree(nslave) - ring_map = self.get_ring(tree_map, parent_map) # set of nodes that finishs the job shutdown = {} # set of nodes that is waiting for connections wait_conn = {} - # set of nodes that is pending for getting up - todo_nodes = range(nslave) - random.shuffle(todo_nodes) # maps job id to rank job_map = {} # list of workers that is pending to be assigned rank pending = [] + # lazy initialize tree_map + tree_map = None while len(shutdown) != nslave: fd, s_addr = self.sock.accept() - s = SlaveEntry(fd, s_addr) + s = SlaveEntry(fd, s_addr) if s.cmd == 'shutdown': assert s.rank >= 0 and s.rank not in shutdown assert s.rank not in wait_conn shutdown[s.rank] = s + print 'Recieve %s signal from %d' % (s.cmd, s.rank) continue - assert s.cmd == 'start' or s.cmd == 'recover' + assert s.cmd == 'start' or s.cmd == 'recover' + # lazily initialize the slaves + if tree_map == None: + assert s.cmd == 'start' + print s.world_size + if s.world_size > 0: + nslave = s.world_size + tree_map, parent_map = self.get_tree(nslave) + ring_map = self.get_ring(tree_map, parent_map) + # set of nodes that is pending for getting up + todo_nodes = range(nslave) + random.shuffle(todo_nodes) + else: + assert s.world_size == -1 or s.world_size == nslave if s.cmd == 'recover': assert s.rank >= 0 - print 'Recieve %s signal from %d' % (s.cmd, s.rank) rank = s.decide_rank(job_map) if rank == -1: assert len(todo_nodes) != 0 @@ -221,6 +232,10 @@ class Tracker: if s.jobid != 'NULL': job_map[s.jobid] = rank s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map) + if s.cmd != 'start': + print 'Recieve %s signal from %d' % (s.cmd, s.rank) + else: + print 'Recieve %s signal from %s assign rank %d' % (s.cmd, s.host, s.rank) if s.wait_accept > 0: wait_conn[rank] = s print 'All nodes finishes job' @@ -234,5 +249,5 @@ def submit(nslave, args, fun_submit = mpi_submit): master = Tracker() submit_thread = Thread(target = fun_submit, args = (nslave, args + master.slave_args())) submit_thread.start() - master.accept_slaves(nslave) + master.accept_slaves(nslaves) submit_thread.join() From 8c35cff02c1e19f9430ac08f08803b0446d49920 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 19 Dec 2014 04:21:16 -0800 Subject: [PATCH 122/531] improve script --- test/.gitignore | 1 + toolkit/README.md | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 toolkit/README.md diff --git a/test/.gitignore b/test/.gitignore index 851969b1e..d3dfbdd4d 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -1,2 +1,3 @@ *.mpi test_* +*_test \ No newline at end of file diff --git a/toolkit/README.md b/toolkit/README.md new file mode 100644 index 000000000..c88d931c9 --- /dev/null +++ b/toolkit/README.md @@ -0,0 +1,3 @@ +Toolkit +==== +This folder contains example toolkit developed using rabit From 6bf282c6c243a728acc1cc8cad7b9ba71a02cb18 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 19 Dec 2014 17:36:42 -0800 Subject: [PATCH 123/531] isolate iserializable --- src/allreduce_base.h | 8 +-- src/allreduce_robust.cc | 10 ++-- src/allreduce_robust.h | 8 +-- src/engine.h | 10 ++-- src/engine_mpi.cc | 8 +-- src/io.h | 89 +-------------------------------- src/rabit-inl.h | 10 ++-- src/rabit.h | 15 +++--- src/serializable.h | 99 +++++++++++++++++++++++++++++++++++++ test/mock.h | 8 +-- test/test_local_recover.cpp | 6 +-- test/test_model_recover.cpp | 6 +-- toolkit/kmeans.cpp | 6 +-- toolkit/toolkit_util.h | 1 + 14 files changed, 150 insertions(+), 134 deletions(-) create mode 100644 src/serializable.h diff --git a/src/allreduce_base.h b/src/allreduce_base.h index bc7cc26c9..e313cab88 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -111,8 +111,8 @@ class AllreduceBase : public IEngine { * * \sa CheckPoint, VersionNumber */ - virtual int LoadCheckPoint(utils::ISerializable *global_model, - utils::ISerializable *local_model = NULL) { + virtual int LoadCheckPoint(ISerializable *global_model, + ISerializable *local_model = NULL) { return 0; } /*! @@ -131,8 +131,8 @@ class AllreduceBase : public IEngine { * * \sa LoadCheckPoint, VersionNumber */ - virtual void CheckPoint(const utils::ISerializable *global_model, - const utils::ISerializable *local_model = NULL) { + virtual void CheckPoint(const ISerializable *global_model, + const ISerializable *local_model = NULL) { version_number += 1; } /*! diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 7d339cf84..538609c62 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -11,8 +11,8 @@ #include #include "./io.h" #include "./utils.h" -#include "./rabit.h" #include "./allreduce_robust.h" +#include "./rabit.h" namespace rabit { namespace engine { @@ -141,8 +141,8 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) * * \sa CheckPoint, VersionNumber */ -int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, - utils::ISerializable *local_model) { +int AllreduceRobust::LoadCheckPoint(ISerializable *global_model, + ISerializable *local_model) { if (num_local_replica == 0) { utils::Check(local_model == NULL, "need to set num_local_replica larger than 1 to checkpoint local_model"); } @@ -198,8 +198,8 @@ int AllreduceRobust::LoadCheckPoint(utils::ISerializable *global_model, * * \sa LoadCheckPoint, VersionNumber */ -void AllreduceRobust::CheckPoint(const utils::ISerializable *global_model, - const utils::ISerializable *local_model) { +void AllreduceRobust::CheckPoint(const ISerializable *global_model, + const ISerializable *local_model) { if (num_local_replica == 0) { utils::Check(local_model == NULL, "need to set num_local_replica larger than 1 to checkpoint local_model"); } diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index 7888a66f1..fd85e4828 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -75,8 +75,8 @@ class AllreduceRobust : public AllreduceBase { * * \sa CheckPoint, VersionNumber */ - virtual int LoadCheckPoint(utils::ISerializable *global_model, - utils::ISerializable *local_model = NULL); + virtual int LoadCheckPoint(ISerializable *global_model, + ISerializable *local_model = NULL); /*! * \brief checkpoint the model, meaning we finished a stage of execution * every time we call check point, there is a version number which will increase by one @@ -93,8 +93,8 @@ class AllreduceRobust : public AllreduceBase { * * \sa LoadCheckPoint, VersionNumber */ - virtual void CheckPoint(const utils::ISerializable *global_model, - const utils::ISerializable *local_model = NULL); + virtual void CheckPoint(const ISerializable *global_model, + const ISerializable *local_model = NULL); /*! * \brief explicitly re-init everything before calling LoadCheckPoint * call this function when IEngine throw an exception out, diff --git a/src/engine.h b/src/engine.h index 977b0d6ff..0700b2a95 100644 --- a/src/engine.h +++ b/src/engine.h @@ -5,7 +5,7 @@ */ #ifndef RABIT_ENGINE_H #define RABIT_ENGINE_H -#include "./io.h" +#include "./serializable.h" namespace MPI { /*! \brief MPI data type just to be compatible with MPI reduce function*/ @@ -92,8 +92,8 @@ class IEngine { * * \sa CheckPoint, VersionNumber */ - virtual int LoadCheckPoint(utils::ISerializable *global_model, - utils::ISerializable *local_model = NULL) = 0; + virtual int LoadCheckPoint(ISerializable *global_model, + ISerializable *local_model = NULL) = 0; /*! * \brief checkpoint the model, meaning we finished a stage of execution * every time we call check point, there is a version number which will increase by one @@ -110,8 +110,8 @@ class IEngine { * * \sa LoadCheckPoint, VersionNumber */ - virtual void CheckPoint(const utils::ISerializable *global_model, - const utils::ISerializable *local_model = NULL) = 0; + virtual void CheckPoint(const ISerializable *global_model, + const ISerializable *local_model = NULL) = 0; /*! * \return version number of current stored model, * which means how many calls to CheckPoint we made so far diff --git a/src/engine_mpi.cc b/src/engine_mpi.cc index 9e5972e1a..870c93fdb 100644 --- a/src/engine_mpi.cc +++ b/src/engine_mpi.cc @@ -34,12 +34,12 @@ class MPIEngine : public IEngine { virtual void InitAfterException(void) { utils::Error("MPI is not fault tolerant"); } - virtual int LoadCheckPoint(utils::ISerializable *global_model, - utils::ISerializable *local_model = NULL) { + virtual int LoadCheckPoint(ISerializable *global_model, + ISerializable *local_model = NULL) { return 0; } - virtual void CheckPoint(const utils::ISerializable *global_model, - const utils::ISerializable *local_model = NULL) { + virtual void CheckPoint(const ISerializable *global_model, + const ISerializable *local_model = NULL) { version_number += 1; } virtual int VersionNumber(void) const { diff --git a/src/io.h b/src/io.h index ed01545f2..699a93c83 100644 --- a/src/io.h +++ b/src/io.h @@ -5,99 +5,14 @@ #include #include #include "./utils.h" +#include "./serializable.h" /*! * \file io.h - * \brief general stream interface for serialization, I/O + * \brief utilities that implements different serializable interface * \author Tianqi Chen */ namespace rabit { namespace utils { -/*! - * \brief interface of stream I/O, used to serialize model - */ -class IStream { - public: - /*! - * \brief read data from stream - * \param ptr pointer to memory buffer - * \param size size of block - * \return usually is the size of data readed - */ - virtual size_t Read(void *ptr, size_t size) = 0; - /*! - * \brief write data to stream - * \param ptr pointer to memory buffer - * \param size size of block - */ - virtual void Write(const void *ptr, size_t size) = 0; - /*! \brief virtual destructor */ - virtual ~IStream(void) {} - - public: - // helper functions to write various of data structures - /*! - * \brief binary serialize a vector - * \param vec vector to be serialized - */ - template - inline void Write(const std::vector &vec) { - uint64_t sz = static_cast(vec.size()); - this->Write(&sz, sizeof(sz)); - if (sz != 0) { - this->Write(&vec[0], sizeof(T) * sz); - } - } - /*! - * \brief binary load a vector - * \param out_vec vector to be loaded - * \return whether load is successfull - */ - template - inline bool Read(std::vector *out_vec) { - uint64_t sz; - if (this->Read(&sz, sizeof(sz)) == 0) return false; - out_vec->resize(sz); - if (sz != 0) { - if (this->Read(&(*out_vec)[0], sizeof(T) * sz) == 0) return false; - } - return true; - } - /*! - * \brief binary serialize a string - * \param str the string to be serialized - */ - inline void Write(const std::string &str) { - uint64_t sz = static_cast(str.length()); - this->Write(&sz, sizeof(sz)); - if (sz != 0) { - this->Write(&str[0], sizeof(char) * sz); - } - } - /*! - * \brief binary load a string - * \param out_str string to be loaded - * \return whether load is successful - */ - inline bool Read(std::string *out_str) { - uint64_t sz; - if (this->Read(&sz, sizeof(sz)) == 0) return false; - out_str->resize(sz); - if (sz != 0) { - if (this->Read(&(*out_str)[0], sizeof(char) * sz) == 0) return false; - } - return true; - } -}; - -/*! \brief interface of se*/ -class ISerializable { - public: - /*! \brief load the model from file */ - virtual void Load(IStream &fi) = 0; - /*! \brief save the model to the stream*/ - virtual void Save(IStream &fo) const = 0; -}; - /*! \brief interface of i/o stream that support seek */ class ISeekStream: public IStream { public: diff --git a/src/rabit-inl.h b/src/rabit-inl.h index 8d379f920..b6126f47d 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -6,6 +6,8 @@ */ #ifndef RABIT_RABIT_INL_H #define RABIT_RABIT_INL_H +// use engine for implementation +#include "./engine.h" namespace rabit { namespace engine { @@ -139,13 +141,13 @@ inline void Allreduce(DType *sendrecvbuf, size_t count, std::function pr #endif // C++11 // load latest check point -inline int LoadCheckPoint(utils::ISerializable *global_model, - utils::ISerializable *local_model) { +inline int LoadCheckPoint(ISerializable *global_model, + ISerializable *local_model) { return engine::GetEngine()->LoadCheckPoint(global_model, local_model); } // checkpoint the model, meaning we finished a stage of execution -inline void CheckPoint(const utils::ISerializable *global_model, - const utils::ISerializable *local_model) { +inline void CheckPoint(const ISerializable *global_model, + const ISerializable *local_model) { engine::GetEngine()->CheckPoint(global_model, local_model); } // return the version number of currently stored model diff --git a/src/rabit.h b/src/rabit.h index ac17faec6..cc65e62ae 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -6,6 +6,7 @@ * The actual implementation is redirected to rabit engine * Code only using this header can also compiled with MPI Allreduce(with no fault recovery), * + * rabit.h and serializable.h is all the user need to use rabit interface * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ #include @@ -14,9 +15,8 @@ #if __cplusplus >= 201103L #include #endif // C++11 -// rabit headers -#include "./io.h" -#include "./engine.h" +// contains definition of ISerializable +#include "./serializable.h" /*! \brief namespace of rabit */ namespace rabit { @@ -31,7 +31,6 @@ struct Sum; /*! \brief perform bitwise OR */ struct BitOR; } // namespace op - /*! * \brief intialize the rabit module, call this once function before using anything * \param argc number of arguments in argv @@ -143,8 +142,8 @@ inline void Allreduce(DType *sendrecvbuf, size_t count, std::function pr * * \sa CheckPoint, VersionNumber */ -inline int LoadCheckPoint(utils::ISerializable *global_model, - utils::ISerializable *local_model = NULL); +inline int LoadCheckPoint(ISerializable *global_model, + ISerializable *local_model = NULL); /*! * \brief checkpoint the model, meaning we finished a stage of execution * every time we call check point, there is a version number which will increase by one @@ -159,8 +158,8 @@ inline int LoadCheckPoint(utils::ISerializable *global_model, * So only CheckPoint with global_model if possible * \sa LoadCheckPoint, VersionNumber */ -inline void CheckPoint(const utils::ISerializable *global_model, - const utils::ISerializable *local_model = NULL); +inline void CheckPoint(const ISerializable *global_model, + const ISerializable *local_model = NULL); /*! * \return version number of current stored model, * which means how many calls to CheckPoint we made so far diff --git a/src/serializable.h b/src/serializable.h new file mode 100644 index 000000000..a269dc1c7 --- /dev/null +++ b/src/serializable.h @@ -0,0 +1,99 @@ +#ifndef RABIT_SERIALIZABLE_H +#define RABIT_SERIALIZABLE_H +#include +#include +#include "./utils.h" +/*! + * \file serializable.h + * \brief defines serializable interface of rabit + * \author Tianqi Chen + */ +namespace rabit { +/*! + * \brief interface of stream I/O, used by ISerializable + * \sa ISerializable + */ +class IStream { + public: + /*! + * \brief read data from stream + * \param ptr pointer to memory buffer + * \param size size of block + * \return usually is the size of data readed + */ + virtual size_t Read(void *ptr, size_t size) = 0; + /*! + * \brief write data to stream + * \param ptr pointer to memory buffer + * \param size size of block + */ + virtual void Write(const void *ptr, size_t size) = 0; + /*! \brief virtual destructor */ + virtual ~IStream(void) {} + + public: + // helper functions to write various of data structures + /*! + * \brief binary serialize a vector + * \param vec vector to be serialized + */ + template + inline void Write(const std::vector &vec) { + uint64_t sz = static_cast(vec.size()); + this->Write(&sz, sizeof(sz)); + if (sz != 0) { + this->Write(&vec[0], sizeof(T) * sz); + } + } + /*! + * \brief binary load a vector + * \param out_vec vector to be loaded + * \return whether load is successfull + */ + template + inline bool Read(std::vector *out_vec) { + uint64_t sz; + if (this->Read(&sz, sizeof(sz)) == 0) return false; + out_vec->resize(sz); + if (sz != 0) { + if (this->Read(&(*out_vec)[0], sizeof(T) * sz) == 0) return false; + } + return true; + } + /*! + * \brief binary serialize a string + * \param str the string to be serialized + */ + inline void Write(const std::string &str) { + uint64_t sz = static_cast(str.length()); + this->Write(&sz, sizeof(sz)); + if (sz != 0) { + this->Write(&str[0], sizeof(char) * sz); + } + } + /*! + * \brief binary load a string + * \param out_str string to be loaded + * \return whether load is successful + */ + inline bool Read(std::string *out_str) { + uint64_t sz; + if (this->Read(&sz, sizeof(sz)) == 0) return false; + out_str->resize(sz); + if (sz != 0) { + if (this->Read(&(*out_str)[0], sizeof(char) * sz) == 0) return false; + } + return true; + } +}; + +/*! \brief interface of se*/ +class ISerializable { + public: + /*! \brief load the model from file */ + virtual void Load(IStream &fi) = 0; + /*! \brief save the model to the stream*/ + virtual void Save(IStream &fo) const = 0; +}; +} // namespace rabit +#endif diff --git a/test/mock.h b/test/mock.h index a5ac39c83..17e5b75c9 100644 --- a/test/mock.h +++ b/test/mock.h @@ -33,14 +33,14 @@ public: rabit::Allreduce(sendrecvbuf, count); } -inline int LoadCheckPoint(utils::ISerializable *global_model, - utils::ISerializable *local_model) { +inline int LoadCheckPoint(ISerializable *global_model, + ISerializable *local_model) { utils::Assert(verify(loadCheckpoint), "[%d] error when loading checkpoint", rank); return rabit::LoadCheckPoint(global_model, local_model); } - inline void CheckPoint(const utils::ISerializable *global_model, - const utils::ISerializable *local_model) { + inline void CheckPoint(const ISerializable *global_model, + const ISerializable *local_model) { utils::Assert(verify(checkpoint), "[%d] error when checkpointing", rank); rabit::CheckPoint(global_model, local_model); } diff --git a/test/test_local_recover.cpp b/test/test_local_recover.cpp index d98c6ae48..b9b84f2d1 100644 --- a/test/test_local_recover.cpp +++ b/test/test_local_recover.cpp @@ -29,16 +29,16 @@ inline void CallEnd(const char *fun, int ntrial, int iter) { } // dummy model -class Model : public rabit::utils::ISerializable { +class Model : public rabit::ISerializable { public: // iterations std::vector data; // load from stream - virtual void Load(rabit::utils::IStream &fi) { + virtual void Load(rabit::IStream &fi) { fi.Read(&data); } /*! \brief save the model to the stream */ - virtual void Save(rabit::utils::IStream &fo) const { + virtual void Save(rabit::IStream &fo) const { fo.Write(data); } virtual void InitModel(size_t n, float v) { diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index 6feb56dde..aba107a85 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -29,16 +29,16 @@ inline void CallEnd(const char *fun, int ntrial, int iter) { } // dummy model -class Model : public rabit::utils::ISerializable { +class Model : public rabit::ISerializable { public: // iterations std::vector data; // load from stream - virtual void Load(rabit::utils::IStream &fi) { + virtual void Load(rabit::IStream &fi) { fi.Read(&data); } /*! \brief save the model to the stream */ - virtual void Save(rabit::utils::IStream &fo) const { + virtual void Save(rabit::IStream &fo) const { fo.Write(data); } virtual void InitModel(size_t n) { diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index c08e50c23..bbd5067af 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -8,18 +8,18 @@ using namespace rabit; // kmeans model -class Model : public rabit::utils::ISerializable { +class Model : public rabit::ISerializable { public: // matrix of centroids Matrix centroids; // load from stream - virtual void Load(rabit::utils::IStream &fi) { + virtual void Load(rabit::IStream &fi) { fi.Read(¢roids.nrow, sizeof(centroids.nrow)); fi.Read(¢roids.ncol, sizeof(centroids.ncol)); fi.Read(¢roids.data); } /*! \brief save the model to the stream */ - virtual void Save(rabit::utils::IStream &fo) const { + virtual void Save(rabit::IStream &fo) const { fo.Write(¢roids.nrow, sizeof(centroids.nrow)); fo.Write(¢roids.ncol, sizeof(centroids.ncol)); fo.Write(centroids.data); diff --git a/toolkit/toolkit_util.h b/toolkit/toolkit_util.h index cff7b7fe0..a2f8f56ac 100644 --- a/toolkit/toolkit_util.h +++ b/toolkit/toolkit_util.h @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace rabit { From 6151899ce22d341511e7996ca09aa3235098a8b5 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 19 Dec 2014 18:40:06 -0800 Subject: [PATCH 124/531] add tracker print --- Makefile | 8 ++-- src/allreduce_base.cc | 46 ++++++++++--------- src/allreduce_base.h | 12 +++++ src/engine.h | 7 +++ src/engine_empty.cc | 90 +++++++++++++++++++++++++++++++++++++ src/engine_mpi.cc | 8 +++- src/rabit-inl.h | 16 +++++++ src/rabit.h | 17 +++++++ src/utils.h | 9 ---- test/speed_test.cpp | 4 +- test/test_local_recover.cpp | 12 ++--- test/test_model_recover.cpp | 12 ++--- tracker/rabit_tracker.py | 28 +++++++----- 13 files changed, 210 insertions(+), 59 deletions(-) create mode 100644 src/engine_empty.cc diff --git a/Makefile b/Makefile index bbed21c81..a591600a8 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,8 @@ export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src BPATH=lib # objectives that makes up rabit library MPIOBJ= $(BPATH)/engine_mpi.o -OBJ= $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o -ALIB= lib/librabit.a lib/librabit_mpi.a +OBJ= $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o $(BPATH)/engine_empty.o +ALIB= lib/librabit.a lib/librabit_mpi.a lib/librabit_empty.a .PHONY: clean all @@ -18,8 +18,10 @@ $(BPATH)/allreduce_base.o: src/allreduce_base.cc src/*.h $(BPATH)/engine.o: src/engine.cc src/*.h $(BPATH)/allreduce_robust.o: src/allreduce_robust.cc src/*.h $(BPATH)/engine_mpi.o: src/engine_mpi.cc src/*.h +$(BPATH)/engine_empty.o: src/engine_empty.cc src/*.h -lib/librabit.a: $(OBJ) +lib/librabit.a: $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o +lib/librabit_empty.a: $(BPATH)/engine_empty.o lib/librabit_mpi.a: $(MPIOBJ) $(OBJ) : diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index d2ab14daa..b8b3ed0de 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -67,24 +67,21 @@ void AllreduceBase::Shutdown(void) { tree_links.plinks.clear(); if (tracker_uri == "NULL") return; - int magic = kMagic; // notify tracker rank i have shutdown - utils::TCPSocket tracker; - tracker.Create(); - if (!tracker.Connect(utils::SockAddr(tracker_uri.c_str(), tracker_port))) { - utils::Socket::Error("Connect Tracker"); - } - utils::Assert(tracker.SendAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 1"); - utils::Assert(tracker.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); - utils::Check(magic == kMagic, "sync::Invalid tracker message, init failure"); - - utils::Assert(tracker.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); - utils::Assert(tracker.SendAll(&world_size, sizeof(world_size)) == sizeof(world_size), "ReConnectLink failure 3"); - tracker.SendStr(task_id); + utils::TCPSocket tracker = this->ConnectTracker(); tracker.SendStr(std::string("shutdown")); tracker.Close(); utils::TCPSocket::Finalize(); } +void AllreduceBase::TrackerPrint(const std::string &msg) { + if (tracker_uri == "NULL") { + utils::Printf("%s", msg.c_str()); return; + } + utils::TCPSocket tracker = this->ConnectTracker(); + tracker.SendStr(std::string("print")); + tracker.SendStr(msg); + tracker.Close(); +} /*! * \brief set parameters to the engine * \param name parameter name @@ -113,14 +110,10 @@ void AllreduceBase::SetParam(const char *name, const char *val) { } } /*! - * \brief connect to the tracker to fix the the missing links - * this function is also used when the engine start up + * \brief initialize connection to the tracker + * \return a socket that initializes the connection */ -void AllreduceBase::ReConnectLinks(const char *cmd) { - // single node mode - if (tracker_uri == "NULL") { - rank = 0; world_size = 1; return; - } +utils::TCPSocket AllreduceBase::ConnectTracker(void) const { int magic = kMagic; // get information from tracker utils::TCPSocket tracker; @@ -134,7 +127,20 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { utils::Assert(tracker.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); utils::Assert(tracker.SendAll(&world_size, sizeof(world_size)) == sizeof(world_size), "ReConnectLink failure 3"); tracker.SendStr(task_id); + return tracker; +} +/*! + * \brief connect to the tracker to fix the the missing links + * this function is also used when the engine start up + */ +void AllreduceBase::ReConnectLinks(const char *cmd) { + // single node mode + if (tracker_uri == "NULL") { + rank = 0; world_size = 1; return; + } + utils::TCPSocket tracker = this->ConnectTracker(); tracker.SendStr(std::string(cmd)); + // the rank of previous link, next link in ring int prev_rank, next_rank; // the rank of neighbors diff --git a/src/allreduce_base.h b/src/allreduce_base.h index e313cab88..8bcc76781 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -45,6 +45,13 @@ class AllreduceBase : public IEngine { * \param val parameter value */ virtual void SetParam(const char *name, const char *val); + /*! + * \brief print the msg in the tracker, + * this function can be used to communicate the information of the progress to + * the user who monitors the tracker + * \param msg message to be printed in the tracker + */ + virtual void TrackerPrint(const std::string &msg); /*! \brief get rank */ virtual int GetRank(void) const { return rank; @@ -279,6 +286,11 @@ class AllreduceBase : public IEngine { return plinks.size(); } }; + /*! + * \brief initialize connection to the tracker + * \return a socket that initializes the connection + */ + utils::TCPSocket ConnectTracker(void) const; /*! * \brief connect to the tracker to fix the the missing links * this function is also used when the engine start up diff --git a/src/engine.h b/src/engine.h index 0700b2a95..891290ae0 100644 --- a/src/engine.h +++ b/src/engine.h @@ -124,6 +124,13 @@ class IEngine { virtual int GetWorldSize(void) const = 0; /*! \brief get the host name of current node */ virtual std::string GetHost(void) const = 0; + /*! + * \brief print the msg in the tracker, + * this function can be used to communicate the information of the progress to + * the user who monitors the tracker + * \param msg message to be printed in the tracker + */ + virtual void TrackerPrint(const std::string &msg) = 0; }; /*! \brief intiialize the engine module */ diff --git a/src/engine_empty.cc b/src/engine_empty.cc new file mode 100644 index 000000000..a2cbd2358 --- /dev/null +++ b/src/engine_empty.cc @@ -0,0 +1,90 @@ +/*! + * \file engine_empty.cc + * \brief this file provides a dummy implementation of engine that does nothing + * this file provides a way to fall back to single node program without causing too many dependencies + * This is usually NOT needed, use engine_mpi or engine for real distributed version + * \author Tianqi Chen + */ +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE +#define NOMINMAX + +#include "./engine.h" +namespace rabit { +namespace engine { +/*! \brief EmptyEngine */ +class EmptyEngine : public IEngine { + public: + EmptyEngine(void) { + version_number = 0; + } + virtual void Allreduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer, + PreprocFunction prepare_fun, + void *prepare_arg) { + utils::Error("EmptyEngine:: Allreduce is not supported, use Allreduce_ instead"); + } + virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) { + } + virtual void InitAfterException(void) { + utils::Error("EmptyEngine is not fault tolerant"); + } + virtual int LoadCheckPoint(ISerializable *global_model, + ISerializable *local_model = NULL) { + return 0; + } + virtual void CheckPoint(const ISerializable *global_model, + const ISerializable *local_model = NULL) { + version_number += 1; + } + virtual int VersionNumber(void) const { + return version_number; + } + /*! \brief get rank of current node */ + virtual int GetRank(void) const { + return 0; + } + /*! \brief get total number of */ + virtual int GetWorldSize(void) const { + return 1; + } + /*! \brief get the host name of current node */ + virtual std::string GetHost(void) const { + return std::string(""); + } + virtual void TrackerPrint(const std::string &msg) { + // simply print information into the tracker + utils::Printf("%s", msg.c_str()); + } + private: + int version_number; +}; + +// singleton sync manager +EmptyEngine manager; + +/*! \brief intiialize the synchronization module */ +void Init(int argc, char *argv[]) { +} +/*! \brief finalize syncrhonization module */ +void Finalize(void) { +} + +/*! \brief singleton method to get engine */ +IEngine *GetEngine(void) { + return &manager; +} +// perform in-place allreduce, on sendrecvbuf +void Allreduce_(void *sendrecvbuf, + size_t type_nbytes, + size_t count, + IEngine::ReduceFunction red, + mpi::DataType dtype, + mpi::OpType op, + IEngine::PreprocFunction prepare_fun, + void *prepare_arg) { +} +} // namespace engine +} // namespace rabit diff --git a/src/engine_mpi.cc b/src/engine_mpi.cc index 870c93fdb..7bf1fa2b6 100644 --- a/src/engine_mpi.cc +++ b/src/engine_mpi.cc @@ -8,6 +8,7 @@ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX +#include #include "./engine.h" #include "./utils.h" #include @@ -61,7 +62,12 @@ class MPIEngine : public IEngine { name[len] = '\0'; return std::string(name); } - + virtual void TrackerPrint(const std::string &msg) { + // simply print information into the tracker + if (GetRank() == 0) { + utils::Printf("%s", msg.c_str()); + } + } private: int version_number; }; diff --git a/src/rabit-inl.h b/src/rabit-inl.h index b6126f47d..8d681d32c 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -8,6 +8,7 @@ #define RABIT_RABIT_INL_H // use engine for implementation #include "./engine.h" +#include "./utils.h" namespace rabit { namespace engine { @@ -140,6 +141,21 @@ inline void Allreduce(DType *sendrecvbuf, size_t count, std::function pr } #endif // C++11 +// print message to the tracker +inline void TrackerPrint(const std::string &msg) { + engine::GetEngine()->TrackerPrint(msg); +} +#ifndef RABIT_STRICT_CXX98_ +inline void TrackerPrintf(const char *fmt, ...) { + const int kPrintBuffer = 1 << 10; + std::string msg(kPrintBuffer, '\0'); + va_list args; + va_start(args, fmt); + vsnprintf(&msg[0], kPrintBuffer, fmt, args); + va_end(args); + TrackerPrint(msg); +} +#endif // load latest check point inline int LoadCheckPoint(ISerializable *global_model, ISerializable *local_model) { diff --git a/src/rabit.h b/src/rabit.h index cc65e62ae..bdf80e259 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -47,6 +47,23 @@ inline int GetRank(void); inline int GetWorldSize(void); /*! \brief get name of processor */ inline std::string GetProcessorName(void); +/*! + * \brief print the msg to the tracker, + * this function can be used to communicate the information of the progress to + * the user who monitors the tracker + * \param msg, the message to be printed + */ +inline void TrackerPrint(const std::string &msg); +#ifndef RABIT_STRICT_CXX98_ +/*! + * \brief print the msg to the tracker, this function may not be available + * in very strict c++98 compilers, but is available most of the time + * this function can be used to communicate the information of the progress to + * the user who monitors the tracker + * \param fmt the format string + */ +inline void TrackerPrintf(const char *fmt, ...); +#endif /*! * \brief broadcast an memory region to all others from root * Example: int a = 1; Broadcast(&a, sizeof(a), root); diff --git a/src/utils.h b/src/utils.h index e1b34fe2e..beae6589f 100644 --- a/src/utils.h +++ b/src/utils.h @@ -106,15 +106,6 @@ inline void Printf(const char *fmt, ...) { va_end(args); HandlePrint(msg.c_str()); } -/*! \brief printf, print message to the console */ -inline void LogPrintf(const char *fmt, ...) { - std::string msg(kPrintBuffer, '\0'); - va_list args; - va_start(args, fmt); - vsnprintf(&msg[0], kPrintBuffer, fmt, args); - va_end(args); - HandleLogPrint(msg.c_str()); -} /*! \brief portable version of snprintf */ inline int SPrintf(char *buf, size_t size, const char *fmt, ...) { va_list args; diff --git a/test/speed_test.cpp b/test/speed_test.cpp index 8f7fc68bf..e716731fd 100644 --- a/test/speed_test.cpp +++ b/test/speed_test.cpp @@ -60,11 +60,11 @@ inline void PrintStats(const char *name, double tdiff, int n, int nrep, size_t s rabit::Allreduce(&tsqr, 1); double tstd = sqrt(tsqr / nproc); if (rabit::GetRank() == 0) { - utils::LogPrintf("%s: mean=%g, std=%g sec\n", name, tavg, tstd); + rabit::TrackerPrintf("%s: mean=%g, std=%g sec\n", name, tavg, tstd); double ndata = n; ndata *= nrep * size; if (n != 0) { - utils::LogPrintf("%s-speed: %g MB/sec\n", name, (ndata / tavg) / 1024 / 1024 ); + rabit::TrackerPrintf("%s-speed: %g MB/sec\n", name, (ndata / tavg) / 1024 / 1024 ); } } } diff --git a/test/test_local_recover.cpp b/test/test_local_recover.cpp index b9b84f2d1..d473345b3 100644 --- a/test/test_local_recover.cpp +++ b/test/test_local_recover.cpp @@ -147,22 +147,22 @@ int main(int argc, char *argv[]) { if (iter == 0) { model.InitModel(n, 1.0f); local.InitModel(n, 1.0f + rank); - utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); + printf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } else { - utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); + printf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } for (int r = iter; r < 3; ++r) { TestMax(&model, &local, ntrial, r); - utils::LogPrintf("[%d] !!!TestMax pass, iter=%d\n", rank, r); + printf("[%d] !!!TestMax pass, iter=%d\n", rank, r); int step = std::max(nproc / 3, 1); for (int i = 0; i < nproc; i += step) { TestBcast(n, i, ntrial, r); } - utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); + printf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); TestSum(&model, &local, ntrial, r); - utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); + printf("[%d] !!!TestSum pass, iter=%d\n", rank, r); rabit::CheckPoint(&model, &local); - utils::LogPrintf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); + printf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); } break; } catch (MockException &e) { diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index aba107a85..117acef09 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -136,22 +136,22 @@ int main(int argc, char *argv[]) { int iter = rabit::LoadCheckPoint(&model); if (iter == 0) { model.InitModel(n); - utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); + printf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } else { - utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); + printf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } for (int r = iter; r < 3; ++r) { TestMax(&model, ntrial, r); - utils::LogPrintf("[%d] !!!TestMax pass, iter=%d\n", rank, r); + printf("[%d] !!!TestMax pass, iter=%d\n", rank, r); int step = std::max(nproc / 3, 1); for (int i = 0; i < nproc; i += step) { TestBcast(n, i, ntrial, r); } - utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); + printf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); TestSum(&model, ntrial, r); - utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); + printf("[%d] !!!TestSum pass, iter=%d\n", rank, r); rabit::CheckPoint(&model); - utils::LogPrintf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); + printf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); } break; } catch (MockException &e) { diff --git a/tracker/rabit_tracker.py b/tracker/rabit_tracker.py index 8e05b4b5a..0322edf5b 100644 --- a/tracker/rabit_tracker.py +++ b/tracker/rabit_tracker.py @@ -188,6 +188,11 @@ class Tracker: rnext = (r + 1) % nslave ring_map[rlst[r]] = (rlst[rprev], rlst[rnext]) return ring_map + def handle_print(self,slave, msg): + sys.stdout.write(msg) + def log_print(self, msg): + sys.stderr.write(msg+'\n') + def accept_slaves(self, nslave): # set of nodes that finishs the job shutdown = {} @@ -202,12 +207,16 @@ class Tracker: while len(shutdown) != nslave: fd, s_addr = self.sock.accept() - s = SlaveEntry(fd, s_addr) + s = SlaveEntry(fd, s_addr) + if s.cmd == 'print': + msg = s.sock.recvstr() + self.handle_print(s, msg) + continue if s.cmd == 'shutdown': assert s.rank >= 0 and s.rank not in shutdown assert s.rank not in wait_conn shutdown[s.rank] = s - print 'Recieve %s signal from %d' % (s.cmd, s.rank) + self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank)) continue assert s.cmd == 'start' or s.cmd == 'recover' # lazily initialize the slaves @@ -233,21 +242,16 @@ class Tracker: job_map[s.jobid] = rank s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map) if s.cmd != 'start': - print 'Recieve %s signal from %d' % (s.cmd, s.rank) + self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank)) else: - print 'Recieve %s signal from %s assign rank %d' % (s.cmd, s.host, s.rank) + self.log_print('Recieve %s signal from %s assign rank %d' % (s.cmd, s.host, s.rank)) if s.wait_accept > 0: wait_conn[rank] = s - print 'All nodes finishes job' + self.log_print('All nodes finishes job') -def mpi_submit(nslave, args): - cmd = ' '.join(['mpirun -n %d' % nslave] + args) - print cmd - return subprocess.check_call(cmd, shell = True) - -def submit(nslave, args, fun_submit = mpi_submit): +def submit(nslave, args, fun_submit): master = Tracker() submit_thread = Thread(target = fun_submit, args = (nslave, args + master.slave_args())) submit_thread.start() - master.accept_slaves(nslaves) + master.accept_slaves(nslave) submit_thread.join() From 2c0a0671ad94225ae6084223e55f377bd50daa6b Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 19 Dec 2014 19:21:21 -0800 Subject: [PATCH 125/531] skip actions when there is only 1 node --- src/allreduce_robust.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 538609c62..7afd3546d 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -68,6 +68,8 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_, ReduceFunction reducer, PreprocFunction prepare_fun, void *prepare_arg) { + // skip action in single node + if (world_size == 1) return; bool recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter); // now we are free to remove the last result, if any if (resbuf.LastSeqNo() != -1 && @@ -98,6 +100,8 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_, * \param root the root worker id to broadcast the data */ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) { + // skip action in single node + if (world_size == 1) return; bool recovered = RecoverExec(sendrecvbuf_, total_size, 0, seq_counter); // now we are free to remove the last result, if any if (resbuf.LastSeqNo() != -1 && @@ -143,6 +147,8 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) */ int AllreduceRobust::LoadCheckPoint(ISerializable *global_model, ISerializable *local_model) { + // skip action in single node + if (world_size == 1) return 0; if (num_local_replica == 0) { utils::Check(local_model == NULL, "need to set num_local_replica larger than 1 to checkpoint local_model"); } @@ -200,6 +206,10 @@ int AllreduceRobust::LoadCheckPoint(ISerializable *global_model, */ void AllreduceRobust::CheckPoint(const ISerializable *global_model, const ISerializable *local_model) { + // never do check point in single machine mode + if (world_size == 1) { + version_number += 1; return; + } if (num_local_replica == 0) { utils::Check(local_model == NULL, "need to set num_local_replica larger than 1 to checkpoint local_model"); } From e72a869fd18e6fea41ad02ba925e4ab3f82c8b84 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 19 Dec 2014 20:57:53 -0800 Subject: [PATCH 126/531] add complex reducer in --- src/engine.cc | 22 ++++++++++++++ src/engine.h | 42 +++++++++++++++++++++++++++ src/engine_empty.cc | 14 +++++++++ src/engine_mpi.cc | 54 ++++++++++++++++++++++++++++++++++ src/rabit-inl.h | 67 ++++++++++++++++++++++++++++++++++++++++++ src/rabit.h | 71 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 270 insertions(+) diff --git a/src/engine.cc b/src/engine.cc index cc6a48745..d6d6c92b6 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -48,5 +48,27 @@ void Allreduce_(void *sendrecvbuf, void *prepare_arg) { GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun, prepare_arg); } + +// code for reduce handle +ReduceHandle::ReduceHandle(void) : handle_(NULL), htype_(NULL) { +} +ReduceHandle::~ReduceHandle(void) {} + +int ReduceHandle::TypeSize(const MPI::Datatype &dtype) { + return static_cast(dtype.type_size); +} +void ReduceHandle::Init(IEngine::ReduceFunction redfunc, size_t type_nbytes) { + utils::Assert(handle_ == NULL, "cannot initialize reduce handle twice"); + handle_ = reinterpret_cast(redfunc); +} +void ReduceHandle::Allreduce(void *sendrecvbuf, + size_t type_nbytes, size_t count, + IEngine::PreprocFunction prepare_fun, + void *prepare_arg) { + utils::Assert(handle_ != NULL, "must intialize handle to call AllReduce"); + GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, + reinterpret_cast(handle_), + prepare_fun, prepare_arg); +} } // namespace engine } // namespace rabit diff --git a/src/engine.h b/src/engine.h index 891290ae0..03cd8e44a 100644 --- a/src/engine.h +++ b/src/engine.h @@ -177,6 +177,48 @@ void Allreduce_(void *sendrecvbuf, mpi::OpType op, IEngine::PreprocFunction prepare_fun = NULL, void *prepare_arg = NULL); + +/*! + * \brief handle for customized reducer, used to handle customized reduce + * this class is mainly created for compatiblity issue with MPI's customized reduce + */ +class ReduceHandle { + public: + // constructor + ReduceHandle(void); + // destructor + ~ReduceHandle(void); + /*! + * \brief initialize the reduce function, + * with the type the reduce function need to deal with + * the reduce function MUST be communicative + */ + void Init(IEngine::ReduceFunction redfunc, size_t type_nbytes); + /*! + * \brief customized in-place all reduce operation + * \param sendrecvbuf the in place send-recv buffer + * \param type_n4bytes unit size of the type, in terms of 4bytes + * \param count number of elements to send + * \param prepare_func Lazy preprocessing function, lazy prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \param prepare_arg argument used to passed into the lazy preprocessing function + */ + void Allreduce(void *sendrecvbuf, + size_t type_nbytes, size_t count, + IEngine::PreprocFunction prepare_fun = NULL, + void *prepare_arg = NULL); + /*! \return the number of bytes occupied by the type */ + static int TypeSize(const MPI::Datatype &dtype); + + private: + // handle data field + void *handle_; + // handle to the type field + void *htype_; + // the created type in 4 bytes + size_t created_type_nbytes_; +}; } // namespace engine } // namespace rabit #endif // RABIT_ENGINE_H diff --git a/src/engine_empty.cc b/src/engine_empty.cc index a2cbd2358..be37e3a7a 100644 --- a/src/engine_empty.cc +++ b/src/engine_empty.cc @@ -86,5 +86,19 @@ void Allreduce_(void *sendrecvbuf, IEngine::PreprocFunction prepare_fun, void *prepare_arg) { } + +// code for reduce handle +ReduceHandle::ReduceHandle(void) : handle_(NULL), htype_(NULL) { +} +ReduceHandle::~ReduceHandle(void) {} + +int ReduceHandle::TypeSize(const MPI::Datatype &dtype) { + return 0; +} +void ReduceHandle::Init(IEngine::ReduceFunction redfunc, size_t type_nbytes) {} +void ReduceHandle::Allreduce(void *sendrecvbuf, + size_t type_nbytes, size_t count, + IEngine::PreprocFunction prepare_fun, + void *prepare_arg) {} } // namespace engine } // namespace rabit diff --git a/src/engine_mpi.cc b/src/engine_mpi.cc index 7bf1fa2b6..46867d3cc 100644 --- a/src/engine_mpi.cc +++ b/src/engine_mpi.cc @@ -124,5 +124,59 @@ void Allreduce_(void *sendrecvbuf, if (prepare_fun != NULL) prepare_fun(prepare_arg); MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, GetType(dtype), GetOp(op)); } + +// code for reduce handle +ReduceHandle::ReduceHandle(void) : handle_(NULL), htype_(NULL) { +} +ReduceHandle::~ReduceHandle(void) { + if (handle_ != NULL) { + MPI::Op *op = reinterpret_cast(handle_); + op->Free(); + delete op; + } + if (htype_ != NULL) { + MPI::Datatype *dtype = reinterpret_cast(htype_); + dtype->Free(); + delete dtype; + } +} +int ReduceHandle::TypeSize(const MPI::Datatype &dtype) { + return dtype.Get_size(); +} +void ReduceHandle::Init(IEngine::ReduceFunction redfunc, size_t type_nbytes) { + utils::Assert(handle_ == NULL, "cannot initialize reduce handle twice"); + if (type_nbytes != 0) { + MPI::Datatype *dtype = new MPI::Datatype(); + *dtype = MPI::CHAR.Create_contiguous(type_nbytes); + dtype->Commit(); + created_type_nbytes_ = type_nbytes; + htype_ = dtype; + } + + MPI::Op *op = new MPI::Op(); + MPI::User_function *pf = redfunc; + op->Init(pf, true); + handle_ = op; +} +void ReduceHandle::Allreduce(void *sendrecvbuf, + size_t type_nbytes, size_t count, + IEngine::PreprocFunction prepare_fun, + void *prepare_arg) { + utils::Assert(handle_ != NULL, "must intialize handle to call AllReduce"); + MPI::Op *op = reinterpret_cast(handle_); + MPI::Datatype *dtype = reinterpret_cast(htype_); + if (created_type_nbytes_ != type_nbytes || dtype == NULL) { + if (dtype == NULL) { + dtype = new MPI::Datatype(); + } else { + dtype->Free(); + } + *dtype = MPI::CHAR.Create_contiguous(type_nbytes); + dtype->Commit(); + created_type_nbytes_ = type_nbytes; + } + if (prepare_fun != NULL) prepare_fun(prepare_arg); + MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, *dtype, *op); +} } // namespace engine } // namespace rabit diff --git a/src/rabit-inl.h b/src/rabit-inl.h index 8d681d32c..679f6d49e 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -8,6 +8,7 @@ #define RABIT_RABIT_INL_H // use engine for implementation #include "./engine.h" +#include "./io.h" #include "./utils.h" namespace rabit { @@ -170,5 +171,71 @@ inline void CheckPoint(const ISerializable *global_model, inline int VersionNumber(void) { return engine::GetEngine()->VersionNumber(); } +// --------------------------------- +// Code to handle customized Reduce +// --------------------------------- +// function to perform reduction for Reducer +template +inline void Reducer::ReduceFunc(const void *src_, void *dst_, int len_, const MPI::Datatype &dtype) { + const size_t kUnit = sizeof(DType); + const char *psrc = reinterpret_cast(src_); + char *pdst = reinterpret_cast(dst_); + DType tdst, tsrc; + for (size_t i = 0; i < len_; ++i) { + // use memcpy to avoid alignment issue + std::memcpy(&tdst, pdst + i * kUnit, sizeof(tdst)); + std::memcpy(&tsrc, psrc + i * kUnit, sizeof(tsrc)); + tdst.Reduce(tsrc); + std::memcpy(pdst + i * kUnit, &tdst, sizeof(tdst)); + } +} +template +inline Reducer::Reducer(void) { + handle_.Init(Reducer::ReduceFunc, sizeof(DType)); +} +template +inline void Reducer::Allreduce(DType *sendrecvbuf, size_t count, + void (*prepare_fun)(void *arg), + void *prepare_arg) { + handle_.Allreduce(sendrecvbuf, sizeof(DType), count, prepare_fun, prepare_arg); +} +// function to perform reduction for SerializeReducer +template +inline void +SerializeReducer::ReduceFunc(const void *src_, void *dst_, int len_, const MPI::Datatype &dtype) { + int nbytes = engine::ReduceHandle::TypeSize(dtype); + // temp space + DType tsrc, tdst; + for (int i = 0; i < len_; ++i) { + utils::MemoryFixSizeBuffer fsrc((char*)(src_) + i * nbytes, nbytes); + utils::MemoryFixSizeBuffer fdst((char*)(dst_) + i * nbytes, nbytes); + tsrc.Load(fsrc); + tdst.Load(fdst); + // govern const check + tdst.Reduce(static_cast(tsrc), nbytes); + fdst.Seek(0); + tdst.Save(fdst); + } +} +template +inline SerializeReducer::SerializeReducer(void) { + handle_.Init(SerializeReducer::ReduceFunc, sizeof(DType)); +} +template +inline void SerializeReducer::Allreduce(DType *sendrecvobj, + size_t max_nbyte, size_t count, + void (*prepare_fun)(void *arg), + void *prepare_arg) { + buffer_.resize(max_nbyte); + for (size_t i = 0; i < count; ++i) { + utils::MemoryFixSizeBuffer fs(BeginPtr(buffer_) + i * max_nbyte, max_nbyte); + sendrecvobj[i].Save(fs); + } + handle_.Allreduce(BeginPtr(buffer_), max_nbyte, count, prepare_fun, prepare_arg); + for (size_t i = 0; i < count; ++i) { + utils::MemoryFixSizeBuffer fs(BeginPtr(buffer_) + i * max_nbyte, max_nbyte); + sendrecvobj[i].Load(fs); + } +} } // namespace rabit #endif diff --git a/src/rabit.h b/src/rabit.h index bdf80e259..316da65c9 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -183,6 +183,77 @@ inline void CheckPoint(const ISerializable *global_model, * \sa LoadCheckPoint, CheckPoint */ inline int VersionNumber(void); +// ----- extensions that allow customized reducer ------ +// helper class to do customized reduce, user do not need to know the type +namespace engine { +class ReduceHandle; +} // namespace engine +/*! + * \brief template class to make customized reduce and all reduce easy + * Do not use reducer directly in the function you call Finalize, because the destructor can happen after Finalize + * \tparam DType data type that to be reduced + * DType must be a struct, with no pointer, and contains a function Reduce(const DType &d); + */ +template +class Reducer { + public: + Reducer(void); + /*! + * \brief customized in-place all reduce operation + * \param sendrecvbuf the in place send-recv buffer + * \param count number of elements to be reduced + * \param prepare_func Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \param prepare_arg argument used to passed into the lazy preprocessing function + */ + inline void Allreduce(DType *sendrecvbuf, size_t count, + void (*prepare_fun)(void *arg) = NULL, + void *prepare_arg = NULL); + + private: + // inner implementation of reducer + inline static void ReduceFunc(const void *src_, void *dst_, int len_, const MPI::Datatype &dtype); + /*! \brief function handle to do reduce */ + engine::ReduceHandle handle_; +}; +/*! + * \brief template class to make customized reduce, + * this class defines complex reducer handles all the data structure that can be + * serialized/deserialzed into fixed size buffer + * Do not use reducer directly in the function you call Finalize, because the destructor can happen after Finalize + * + * \tparam DType data type that to be reduced, DType must contain following functions: + * (1) Save(IStream &fs) (2) Load(IStream &fs) (3) Reduce(const DType &d); + */ +template +class SerializeReducer { + public: + SerializeReducer(void); + /*! + * \brief customized in-place all reduce operation + * \param sendrecvobj pointer to the array of objects to be reduced + * \param max_nbyte maximum amount of memory needed to serialize each object + * this includes budget limit for intermediate and final result + * \param count number of elements to be reduced + * \param prepare_func Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called + * \param prepare_arg argument used to passed into the lazy preprocessing function + */ + inline void Allreduce(DType *sendrecvobj, + size_t max_nbyte, size_t count, + void (*prepare_fun)(void *arg) = NULL, + void *prepare_arg = NULL); + + private: + // inner implementation of reducer + inline static void ReduceFunc(const void *src_, void *dst_, int len_, const MPI::Datatype &dtype); + /*! \brief function handle to do reduce */ + engine::ReduceHandle handle_; + /*! \brief temporal buffer used to do reduce*/ + std::string buffer_; +}; } // namespace rabit // implementation of template functions #include "./rabit-inl.h" From 5570e7ceae742cef037c6562796ef167496f5758 Mon Sep 17 00:00:00 2001 From: tqchen Date: Fri, 19 Dec 2014 21:12:10 -0800 Subject: [PATCH 127/531] add complex types --- src/rabit-inl.h | 24 ++++++++++++++++++------ src/rabit.h | 34 ++++++++++++++++++++++++++++------ 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/src/rabit-inl.h b/src/rabit-inl.h index 679f6d49e..54e2c05d5 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -7,7 +7,6 @@ #ifndef RABIT_RABIT_INL_H #define RABIT_RABIT_INL_H // use engine for implementation -#include "./engine.h" #include "./io.h" #include "./utils.h" @@ -176,7 +175,7 @@ inline int VersionNumber(void) { // --------------------------------- // function to perform reduction for Reducer template -inline void Reducer::ReduceFunc(const void *src_, void *dst_, int len_, const MPI::Datatype &dtype) { +inline void ReducerFunc_(const void *src_, void *dst_, int len_, const MPI::Datatype &dtype) { const size_t kUnit = sizeof(DType); const char *psrc = reinterpret_cast(src_); char *pdst = reinterpret_cast(dst_); @@ -191,7 +190,7 @@ inline void Reducer::ReduceFunc(const void *src_, void *dst_, int len_, c } template inline Reducer::Reducer(void) { - handle_.Init(Reducer::ReduceFunc, sizeof(DType)); + this->handle_.Init(ReducerFunc_, sizeof(DType)); } template inline void Reducer::Allreduce(DType *sendrecvbuf, size_t count, @@ -201,8 +200,7 @@ inline void Reducer::Allreduce(DType *sendrecvbuf, size_t count, } // function to perform reduction for SerializeReducer template -inline void -SerializeReducer::ReduceFunc(const void *src_, void *dst_, int len_, const MPI::Datatype &dtype) { +inline void SerializeReducerFunc_(const void *src_, void *dst_, int len_, const MPI::Datatype &dtype) { int nbytes = engine::ReduceHandle::TypeSize(dtype); // temp space DType tsrc, tdst; @@ -219,7 +217,7 @@ SerializeReducer::ReduceFunc(const void *src_, void *dst_, int len_, cons } template inline SerializeReducer::SerializeReducer(void) { - handle_.Init(SerializeReducer::ReduceFunc, sizeof(DType)); + handle_.Init(SerializeReducerFunc_, sizeof(DType)); } template inline void SerializeReducer::Allreduce(DType *sendrecvobj, @@ -237,5 +235,19 @@ inline void SerializeReducer::Allreduce(DType *sendrecvobj, sendrecvobj[i].Load(fs); } } + +#if __cplusplus >= 201103L +template +inline void Reducer::Allreduce(DType *sendrecvbuf, size_t count, + std::function prepare_fun) { + this->AllReduce(sendrecvbuf, count, InvokeLambda_, &prepare_fun); +} +template +inline void SerializeReducer::Allreduce(DType *sendrecvobj, + size_t max_nbytes, size_t count, + std::function prepare_fun) { + this->AllReduce(sendrecvobj, count, max_nbytes, InvokeLambda_, &prepare_fun); +} +#endif } // namespace rabit #endif diff --git a/src/rabit.h b/src/rabit.h index 316da65c9..f5c94e1c9 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -17,6 +17,10 @@ #endif // C++11 // contains definition of ISerializable #include "./serializable.h" +// engine definition of rabit, defines internal implementation +// to use rabit interface, there is no need to read engine.h rabit.h and serializable.h +// is suffice to use the interface +#include "./engine.h" /*! \brief namespace of rabit */ namespace rabit { @@ -210,10 +214,17 @@ class Reducer { inline void Allreduce(DType *sendrecvbuf, size_t count, void (*prepare_fun)(void *arg) = NULL, void *prepare_arg = NULL); - +#if __cplusplus >= 201103L + /*! + * \brief customized in-place all reduce operation, with lambda function as preprocessor + * \param sendrecvbuf pointer to the array of objects to be reduced + * \param count number of elements to be reduced + * \param prepare_fun lambda function executed to prepare the data, if necessary + */ + inline void Allreduce(DType *sendrecvbuf, size_t count, + std::function prepare_fun); +#endif private: - // inner implementation of reducer - inline static void ReduceFunc(const void *src_, void *dst_, int len_, const MPI::Datatype &dtype); /*! \brief function handle to do reduce */ engine::ReduceHandle handle_; }; @@ -245,10 +256,21 @@ class SerializeReducer { size_t max_nbyte, size_t count, void (*prepare_fun)(void *arg) = NULL, void *prepare_arg = NULL); - +// C++11 support for lambda prepare function +#if __cplusplus >= 201103L + /*! + * \brief customized in-place all reduce operation, with lambda function as preprocessor + * \param sendrecvobj pointer to the array of objects to be reduced + * \param max_nbyte maximum amount of memory needed to serialize each object + * this includes budget limit for intermediate and final result + * \param count number of elements to be reduced + * \param prepare_fun lambda function executed to prepare the data, if necessary + */ + inline void Allreduce(DType *sendrecvobj, + size_t max_nbyte, size_t count, + std::function prepare_fun); +#endif private: - // inner implementation of reducer - inline static void ReduceFunc(const void *src_, void *dst_, int len_, const MPI::Datatype &dtype); /*! \brief function handle to do reduce */ engine::ReduceHandle handle_; /*! \brief temporal buffer used to do reduce*/ From 77d74f6c0dd164d87c104926e3aca39de946cf13 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 20 Dec 2014 05:04:16 -0800 Subject: [PATCH 128/531] fix bug in lambda allreduce --- src/rabit-inl.h | 38 ++++++++++++++++++++++++++++++-------- src/rabit.h | 4 ++++ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/rabit-inl.h b/src/rabit-inl.h index 54e2c05d5..20ee39720 100644 --- a/src/rabit-inl.h +++ b/src/rabit-inl.h @@ -219,17 +219,39 @@ template inline SerializeReducer::SerializeReducer(void) { handle_.Init(SerializeReducerFunc_, sizeof(DType)); } +// closure to call Allreduce +template +struct SerializeReduceClosure { + DType *sendrecvobj; + size_t max_nbyte, count; + void (*prepare_fun)(void *arg); + void *prepare_arg; + std::string *p_buffer; + // invoke the closure + inline void Run(void) { + if (prepare_fun != NULL) prepare_fun(prepare_arg); + for (size_t i = 0; i < count; ++i) { + utils::MemoryFixSizeBuffer fs(BeginPtr(*p_buffer) + i * max_nbyte, max_nbyte); + sendrecvobj[i].Save(fs); + } + } + inline static void Invoke(void *c) { + static_cast*>(c)->Run(); + } +}; template inline void SerializeReducer::Allreduce(DType *sendrecvobj, size_t max_nbyte, size_t count, void (*prepare_fun)(void *arg), void *prepare_arg) { - buffer_.resize(max_nbyte); - for (size_t i = 0; i < count; ++i) { - utils::MemoryFixSizeBuffer fs(BeginPtr(buffer_) + i * max_nbyte, max_nbyte); - sendrecvobj[i].Save(fs); - } - handle_.Allreduce(BeginPtr(buffer_), max_nbyte, count, prepare_fun, prepare_arg); + buffer_.resize(max_nbyte * count); + // setup closure + SerializeReduceClosure c; + c.sendrecvobj = sendrecvobj; c.max_nbyte = max_nbyte; c.count = count; + c.prepare_fun = prepare_fun; c.prepare_arg = prepare_arg; c.p_buffer = &buffer_; + // invoke here + handle_.Allreduce(BeginPtr(buffer_), max_nbyte, count, + SerializeReduceClosure::Invoke, &c); for (size_t i = 0; i < count; ++i) { utils::MemoryFixSizeBuffer fs(BeginPtr(buffer_) + i * max_nbyte, max_nbyte); sendrecvobj[i].Load(fs); @@ -240,13 +262,13 @@ inline void SerializeReducer::Allreduce(DType *sendrecvobj, template inline void Reducer::Allreduce(DType *sendrecvbuf, size_t count, std::function prepare_fun) { - this->AllReduce(sendrecvbuf, count, InvokeLambda_, &prepare_fun); + this->Allreduce(sendrecvbuf, count, InvokeLambda_, &prepare_fun); } template inline void SerializeReducer::Allreduce(DType *sendrecvobj, size_t max_nbytes, size_t count, std::function prepare_fun) { - this->AllReduce(sendrecvobj, count, max_nbytes, InvokeLambda_, &prepare_fun); + this->Allreduce(sendrecvobj, max_nbytes, count, InvokeLambda_, &prepare_fun); } #endif } // namespace rabit diff --git a/src/rabit.h b/src/rabit.h index f5c94e1c9..834c21fd1 100644 --- a/src/rabit.h +++ b/src/rabit.h @@ -49,6 +49,10 @@ inline void Finalize(void); inline int GetRank(void); /*! \brief get total number of process */ inline int GetWorldSize(void); +/*! \brief whether rabit env is in distributed mode */ +inline bool IsDistributed(void) { + return GetWorldSize() != 1; +} /*! \brief get name of processor */ inline std::string GetProcessorName(void); /*! From 925d014271d42d0ac031f4dcf1019b648c23d0f0 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 20 Dec 2014 16:19:54 -0800 Subject: [PATCH 129/531] change file structure --- Makefile | 14 +++--- include/README.md | 7 +++ {src => include}/rabit.h | 10 ++--- {src => include/rabit}/engine.h | 3 +- {src => include/rabit}/io.h | 2 +- {src => include/rabit}/rabit-inl.h | 1 + {src => include/rabit}/timer.h | 1 + {src => include/rabit}/utils.h | 0 .../rabit_serializable.h | 6 +-- src/README.md | 6 +++ src/allreduce_base.h | 4 +- src/allreduce_robust.cc | 7 +-- src/allreduce_robust.h | 2 +- src/engine.cc | 2 +- src/engine_empty.cc | 3 +- src/engine_mpi.cc | 4 +- src/socket.h | 2 +- test/Makefile | 31 +++++-------- test/config.h | 2 +- test/speed_test.cpp | 4 +- test/test_local_recover.cpp | 2 +- test/test_model_recover.cpp | 2 +- toolkit/Makefile | 2 +- toolkit/kmeans.cpp | 43 ++++++++++--------- 24 files changed, 84 insertions(+), 76 deletions(-) create mode 100644 include/README.md rename {src => include}/rabit.h (98%) rename {src => include/rabit}/engine.h (99%) rename {src => include/rabit}/io.h (99%) rename {src => include/rabit}/rabit-inl.h (99%) rename {src => include/rabit}/timer.h (88%) rename {src => include/rabit}/utils.h (100%) rename src/serializable.h => include/rabit_serializable.h (96%) create mode 100644 src/README.md diff --git a/Makefile b/Makefile index a591600a8..2b614f3e1 100644 --- a/Makefile +++ b/Makefile @@ -2,23 +2,23 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx export LDFLAGS= -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -Iinclude BPATH=lib # objectives that makes up rabit library MPIOBJ= $(BPATH)/engine_mpi.o OBJ= $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o $(BPATH)/engine_empty.o ALIB= lib/librabit.a lib/librabit_mpi.a lib/librabit_empty.a - +HEADERS=src/*.h include/*.h include/rabit/*.h .PHONY: clean all all: $(ALIB) -$(BPATH)/allreduce_base.o: src/allreduce_base.cc src/*.h -$(BPATH)/engine.o: src/engine.cc src/*.h -$(BPATH)/allreduce_robust.o: src/allreduce_robust.cc src/*.h -$(BPATH)/engine_mpi.o: src/engine_mpi.cc src/*.h -$(BPATH)/engine_empty.o: src/engine_empty.cc src/*.h +$(BPATH)/allreduce_base.o: src/allreduce_base.cc $(HEADERS) +$(BPATH)/engine.o: src/engine.cc $(HEADERS) +$(BPATH)/allreduce_robust.o: src/allreduce_robust.cc $(HEADERS) +$(BPATH)/engine_mpi.o: src/engine_mpi.cc $(HEADERS) +$(BPATH)/engine_empty.o: src/engine_empty.cc $(HEADERS) lib/librabit.a: $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o lib/librabit_empty.a: $(BPATH)/engine_empty.o diff --git a/include/README.md b/include/README.md new file mode 100644 index 000000000..1481f2cdb --- /dev/null +++ b/include/README.md @@ -0,0 +1,7 @@ +Library Header Files of Rabit +==== +* This folder contains all the header needed to use rabit libary +* To use it, add include to the search path of the compiler +* User only need to know [rabit.h](rabit.h) and [rabit_serializable.h](rabit_serializable.h) to use the library +* Folder [rabit](rabit) contains headers for internal engine and implementation of template +* Not all .h files in the projects are contained in include, .h files that are internally used by library remains at [src](../src) diff --git a/src/rabit.h b/include/rabit.h similarity index 98% rename from src/rabit.h rename to include/rabit.h index 834c21fd1..e3f9edfb3 100644 --- a/src/rabit.h +++ b/include/rabit.h @@ -16,11 +16,11 @@ #include #endif // C++11 // contains definition of ISerializable -#include "./serializable.h" +#include "./rabit_serializable.h" // engine definition of rabit, defines internal implementation -// to use rabit interface, there is no need to read engine.h rabit.h and serializable.h -// is suffice to use the interface -#include "./engine.h" +// to use rabit interface, there is no need to read engine.h +// rabit.h and serializable.h are suffice to use the interface +#include "./rabit/engine.h" /*! \brief namespace of rabit */ namespace rabit { @@ -282,5 +282,5 @@ class SerializeReducer { }; } // namespace rabit // implementation of template functions -#include "./rabit-inl.h" +#include "./rabit/rabit-inl.h" #endif // RABIT_ALLREDUCE_H diff --git a/src/engine.h b/include/rabit/engine.h similarity index 99% rename from src/engine.h rename to include/rabit/engine.h index 03cd8e44a..ce2f85b66 100644 --- a/src/engine.h +++ b/include/rabit/engine.h @@ -5,7 +5,7 @@ */ #ifndef RABIT_ENGINE_H #define RABIT_ENGINE_H -#include "./serializable.h" +#include "../rabit_serializable.h" namespace MPI { /*! \brief MPI data type just to be compatible with MPI reduce function*/ @@ -222,3 +222,4 @@ class ReduceHandle { } // namespace engine } // namespace rabit #endif // RABIT_ENGINE_H + diff --git a/src/io.h b/include/rabit/io.h similarity index 99% rename from src/io.h rename to include/rabit/io.h index 699a93c83..44d0a0505 100644 --- a/src/io.h +++ b/include/rabit/io.h @@ -5,7 +5,7 @@ #include #include #include "./utils.h" -#include "./serializable.h" +#include "../rabit_serializable.h" /*! * \file io.h * \brief utilities that implements different serializable interface diff --git a/src/rabit-inl.h b/include/rabit/rabit-inl.h similarity index 99% rename from src/rabit-inl.h rename to include/rabit/rabit-inl.h index 20ee39720..55f60cf44 100644 --- a/src/rabit-inl.h +++ b/include/rabit/rabit-inl.h @@ -9,6 +9,7 @@ // use engine for implementation #include "./io.h" #include "./utils.h" +#include "../rabit.h" namespace rabit { namespace engine { diff --git a/src/timer.h b/include/rabit/timer.h similarity index 88% rename from src/timer.h rename to include/rabit/timer.h index c0c83c1c8..bf9f48383 100644 --- a/src/timer.h +++ b/include/rabit/timer.h @@ -10,6 +10,7 @@ namespace rabit { namespace utils { +// TODO not net cross platform, avoid to use this in most places /*! * \brief return time in seconds */ diff --git a/src/utils.h b/include/rabit/utils.h similarity index 100% rename from src/utils.h rename to include/rabit/utils.h diff --git a/src/serializable.h b/include/rabit_serializable.h similarity index 96% rename from src/serializable.h rename to include/rabit_serializable.h index a269dc1c7..eabc03f81 100644 --- a/src/serializable.h +++ b/include/rabit_serializable.h @@ -1,8 +1,8 @@ -#ifndef RABIT_SERIALIZABLE_H -#define RABIT_SERIALIZABLE_H +#ifndef RABIT_RABIT_SERIALIZABLE_H +#define RABIT_RABIT_SERIALIZABLE_H #include #include -#include "./utils.h" +#include "./rabit/utils.h" /*! * \file serializable.h * \brief defines serializable interface of rabit diff --git a/src/README.md b/src/README.md new file mode 100644 index 000000000..5e55d9210 --- /dev/null +++ b/src/README.md @@ -0,0 +1,6 @@ +Source Files of Rabit +==== +* This folder contains the source files of rabit library +* The library headers are in folder [include](../include) +* The .h files in this folder are internal header files that are only used by rabit and will not be seen by users + diff --git a/src/allreduce_base.h b/src/allreduce_base.h index 8bcc76781..f26b5ec45 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -13,9 +13,9 @@ #include #include -#include "./utils.h" +#include +#include #include "./socket.h" -#include "./engine.h" namespace MPI { // MPI data type to be compatible with existing MPI interface diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 7afd3546d..fe8013cb6 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -9,10 +9,11 @@ #define NOMINMAX #include #include -#include "./io.h" -#include "./utils.h" +#include +#include +#include +#include #include "./allreduce_robust.h" -#include "./rabit.h" namespace rabit { namespace engine { diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index fd85e4828..d178e391a 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -10,7 +10,7 @@ #ifndef RABIT_ALLREDUCE_ROBUST_H #define RABIT_ALLREDUCE_ROBUST_H #include -#include "./engine.h" +#include #include "./allreduce_base.h" namespace rabit { diff --git a/src/engine.cc b/src/engine.cc index d6d6c92b6..efcb8616d 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -9,7 +9,7 @@ #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX -#include "./engine.h" +#include #include "./allreduce_base.h" #include "./allreduce_robust.h" diff --git a/src/engine_empty.cc b/src/engine_empty.cc index be37e3a7a..ff838717e 100644 --- a/src/engine_empty.cc +++ b/src/engine_empty.cc @@ -9,7 +9,8 @@ #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX -#include "./engine.h" +#include + namespace rabit { namespace engine { /*! \brief EmptyEngine */ diff --git a/src/engine_mpi.cc b/src/engine_mpi.cc index 46867d3cc..d8a30cbbc 100644 --- a/src/engine_mpi.cc +++ b/src/engine_mpi.cc @@ -9,8 +9,8 @@ #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX #include -#include "./engine.h" -#include "./utils.h" +#include +#include #include namespace rabit { diff --git a/src/socket.h b/src/socket.h index 65516690d..899ab03a7 100644 --- a/src/socket.h +++ b/src/socket.h @@ -21,7 +21,7 @@ #endif #include #include -#include "./utils.h" +#include #if defined(_WIN32) typedef int ssize_t; diff --git a/test/Makefile b/test/Makefile index 18d876b2e..3b5729b11 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,30 +1,22 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx -export LDFLAGS= -pthread -lm -lrt -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src -std=c++11 +export LDFLAGS= -pthread -lm -lrt -L../lib +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include -std=c++11 # specify tensor path BIN = speed_test test_model_recover test_local_recover -# objectives that makes up rabit library -RABIT_OBJ = allreduce_base.o allreduce_robust.o engine.o -MPIOBJ = engine_mpi.o - OBJ = $(RABIT_OBJ) speed_test.o test_model_recover.o test_local_recover.o MPIBIN = speed_test.mpi -.PHONY: clean all +.PHONY: clean all lib all: $(BIN) $(MPIBIN) -# the rabit library -allreduce_base.o: ../src/allreduce_base.cc ../src/*.h -engine.o: ../src/engine.cc ../src/*.h -engine_mpi.o: ../src/engine_mpi.cc -allreduce_robust.o: ../src/allreduce_robust.cc ../src/*.h - +lib: + cd ..;make;cd - # programs -speed_test.o: speed_test.cpp ../src/*.h -test_model_recover.o: test_model_recover.cpp ../src/*.h -test_local_recover.o: test_local_recover.cpp ../src/*.h +speed_test.o: speed_test.cpp ../include/*.h lib +test_model_recover.o: test_model_recover.cpp ../include/*.h lib +test_local_recover.o: test_local_recover.cpp ../include/*.h lib # we can link against MPI version to get use MPI speed_test: speed_test.o $(RABIT_OBJ) @@ -33,16 +25,13 @@ test_model_recover: test_model_recover.o $(RABIT_OBJ) test_local_recover: test_local_recover.o $(RABIT_OBJ) $(BIN) : - $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit $(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) $(MPIBIN) : - $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) - -$(MPIOBJ) : - $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) + $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mpi clean: $(RM) $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) *~ ../src/*~ diff --git a/test/config.h b/test/config.h index 146948adc..467e8f63e 100644 --- a/test/config.h +++ b/test/config.h @@ -10,7 +10,7 @@ #include #include #include -#include "./utils.h" +#include "./rabit/utils.h" namespace rabit { namespace utils { diff --git a/test/speed_test.cpp b/test/speed_test.cpp index e716731fd..68891bd31 100644 --- a/test/speed_test.cpp +++ b/test/speed_test.cpp @@ -1,7 +1,7 @@ // This program is used to test the speed of rabit API #include -#include -#include +#include +#include #include #include #include diff --git a/test/test_local_recover.cpp b/test/test_local_recover.cpp index d473345b3..e278a38ca 100644 --- a/test/test_local_recover.cpp +++ b/test/test_local_recover.cpp @@ -1,7 +1,7 @@ // this is a test case to test whether rabit can recover model when // facing an exception #include -#include +#include #include #include #include diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index 117acef09..f3693fa24 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -1,7 +1,7 @@ // this is a test case to test whether rabit can recover model when // facing an exception #include -#include +#include #include #include #include diff --git a/toolkit/Makefile b/toolkit/Makefile index 429264ad3..a194ccd08 100644 --- a/toolkit/Makefile +++ b/toolkit/Makefile @@ -2,7 +2,7 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx export LDFLAGS= -pthread -lm -L../lib -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src -std=c++11 +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include -std=c++11 # specify tensor path BIN = kmeans diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index bbd5067af..3a55a0427 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -1,7 +1,7 @@ // this is a test case to test whether rabit can recover model when // facing an exception #include -#include +#include #include "./toolkit_util.h" #include @@ -105,32 +105,33 @@ int main(int argc, char *argv[]) { model.InitModel(num_cluster, data.feat_dim); InitCentroids(data, &model.centroids); model.Normalize(); - utils::LogPrintf("[%d] start at %s\n", - rabit::GetRank(), rabit::GetProcessorName().c_str()); + rabit::TrackerPrintf("[%d] start at %s\n", + rabit::GetRank(), rabit::GetProcessorName().c_str()); } else { - utils::LogPrintf("[%d] restart iter=%d\n", rabit::GetRank(), iter); + rabit::TrackerPrintf("[%d] restart iter=%d\n", rabit::GetRank(), iter); } const unsigned num_feat = data.feat_dim; // matrix to store the result Matrix temp; - for (int r = iter; r < max_iter; ++r) { + for (int r = iter; r < max_iter; ++r) { temp.Init(num_cluster, num_feat + 1, 0.0f); - // call allreduce - rabit::Allreduce(&temp.data[0], temp.data.size(), [&]() { - // lambda function used to calculate the data if necessary - // this function may not be called when the result can be directly recovered - const size_t ndata = data.NumRow(); - for (size_t i = 0; i < ndata; ++i) { - SparseMat::Vector v = data[i]; - size_t k = GetCluster(model.centroids, v); - // temp[k] += v - for (size_t j = 0; j < v.length; ++j) { - temp[k][v[j].findex] += v[j].fvalue; - } - // use last column to record counts - temp[k][num_feat] += 1.0f; + auto lazy_get_centroid = [&]() { + // lambda function used to calculate the data if necessary + // this function may not be called when the result can be directly recovered + const size_t ndata = data.NumRow(); + for (size_t i = 0; i < ndata; ++i) { + SparseMat::Vector v = data[i]; + size_t k = GetCluster(model.centroids, v); + // temp[k] += v + for (size_t j = 0; j < v.length; ++j) { + temp[k][v[j].findex] += v[j].fvalue; } - }); + // use last column to record counts + temp[k][num_feat] += 1.0f; + } + }; + // call allreduce + rabit::Allreduce(&temp.data[0], temp.data.size(), lazy_get_centroid); // set number for (int k = 0; k < num_cluster; ++k) { float cnt = temp[k][num_feat]; @@ -146,7 +147,7 @@ int main(int argc, char *argv[]) { if (rabit::GetRank() == 0) { model.centroids.Print(argv[4]); } - utils::LogPrintf("[%d] Time taken: %f seconds\n", rabit::GetRank(), static_cast(clock() - tStart) / CLOCKS_PER_SEC); + rabit::TrackerPrintf("[%d] Time taken: %f seconds\n", rabit::GetRank(), static_cast(clock() - tStart) / CLOCKS_PER_SEC); rabit::Finalize(); return 0; } From ecf91ee081158d6019e22c1acc6ffb7db74aa642 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 20 Dec 2014 16:54:15 -0800 Subject: [PATCH 130/531] change usage --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 995f7b274..0517473a5 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementations of distributed machine learning programs, many of which fall naturally under the Allreduce abstraction. -* See the [package interface file](src/rabit.h) +* See the [package interface file](include/rabit.h) Features ==== @@ -17,6 +17,11 @@ Features - Code that uses the rabit interface also compiles with existing MPI compilers - Users can use MPI Allreduce with no code modification +Use Rabit +==== +* Type make in the root folder will compile the rabit library in lib folder +* Add lib to the library path and include to the include path of compiler + Design Notes ==== * Rabit is designed for algorithms that replicate the same global model across nodes, while each node operates on a local partition of the data. From 10bb407a2cdc85f17363e97f4b314cddb3891fed Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 20 Dec 2014 18:31:33 -0800 Subject: [PATCH 131/531] add mock engine --- Makefile | 6 ++- src/allreduce_mock.h | 94 ++++++++++++++++++++++++++++++++++++++++++++ src/engine.cc | 4 ++ src/engine_mock.cc | 15 +++++++ 4 files changed, 117 insertions(+), 2 deletions(-) create mode 100644 src/allreduce_mock.h create mode 100644 src/engine_mock.cc diff --git a/Makefile b/Makefile index 2b614f3e1..32c7070b3 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,8 @@ export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -Iinclude BPATH=lib # objectives that makes up rabit library MPIOBJ= $(BPATH)/engine_mpi.o -OBJ= $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o $(BPATH)/engine_empty.o -ALIB= lib/librabit.a lib/librabit_mpi.a lib/librabit_empty.a +OBJ= $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o $(BPATH)/engine_empty.o $(BPATH)/engine_mock.o +ALIB= lib/librabit.a lib/librabit_mpi.a lib/librabit_empty.a lib/librabit_mock.a HEADERS=src/*.h include/*.h include/rabit/*.h .PHONY: clean all @@ -19,8 +19,10 @@ $(BPATH)/engine.o: src/engine.cc $(HEADERS) $(BPATH)/allreduce_robust.o: src/allreduce_robust.cc $(HEADERS) $(BPATH)/engine_mpi.o: src/engine_mpi.cc $(HEADERS) $(BPATH)/engine_empty.o: src/engine_empty.cc $(HEADERS) +$(BPATH)/engine_mock.o: src/engine_mock.cc $(HEADERS) lib/librabit.a: $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o +lib/librabit_mock.a: $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine_mock.o lib/librabit_empty.a: $(BPATH)/engine_empty.o lib/librabit_mpi.a: $(MPIOBJ) diff --git a/src/allreduce_mock.h b/src/allreduce_mock.h new file mode 100644 index 000000000..36d760b70 --- /dev/null +++ b/src/allreduce_mock.h @@ -0,0 +1,94 @@ +/*! + * \file allreduce_mock.h + * \brief Mock test module of AllReduce engine, + * insert failures in certain call point, to test if the engine is robust to failure + * + * \author Ignacio Cano, Tianqi Chen + */ +#ifndef RABIT_ALLREDUCE_MOCK_H +#define RABIT_ALLREDUCE_MOCK_H +#include +#include +#include +#include "./allreduce_robust.h" + +namespace rabit { +namespace engine { +class AllreduceMock : public AllreduceRobust { + public: + // constructor + AllreduceMock(void) { + num_trial = 0; + } + // destructor + virtual ~AllreduceMock(void) {} + virtual void SetParam(const char *name, const char *val) { + AllreduceRobust::SetParam(name, val); + // additional parameters + if (!strcmp(name, "rabit_num_trial")) num_trial = atoi(val); + if (!strcmp(name, "mock")) { + MockKey k; + utils::Check(sscanf(val, "%d,%d,%d,%d", + &k.rank, &k.version, &k.seqno, &k.ntrial) == 4, + "invalid mock parameter"); + mock_map[k] = 1; + } + } + virtual void Allreduce(void *sendrecvbuf_, + size_t type_nbytes, + size_t count, + ReduceFunction reducer, + PreprocFunction prepare_fun, + void *prepare_arg) { + this->Verify(MockKey(rank, version_number, seq_counter, num_trial)); + AllreduceRobust::Allreduce(sendrecvbuf_, type_nbytes, + count, reducer, prepare_fun, prepare_arg); + } + virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { + this->Verify(MockKey(rank, version_number, seq_counter, num_trial)); + AllreduceRobust::Broadcast(sendrecvbuf_, total_size, root); + } + virtual void CheckPoint(const ISerializable *global_model, + const ISerializable *local_model) { + this->Verify(MockKey(rank, version_number, seq_counter, num_trial)); + AllreduceRobust::CheckPoint(global_model, local_model); + } + + private: + // key to identify the mock stage + struct MockKey { + int rank; + int version; + int seqno; + int ntrial; + MockKey(void) {} + MockKey(int rank, int version, int seqno, int ntrial) + : rank(rank), version(version), seqno(seqno), ntrial(ntrial) {} + inline bool operator==(const MockKey &b) const { + return rank == b.rank && + version == b.version && + seqno == b.seqno && + ntrial == b.ntrial; + } + inline bool operator<(const MockKey &b) const { + if (rank != b.rank) return rank < b.rank; + if (version != b.version) return version < b.version; + if (seqno != b.seqno) return seqno < b.seqno; + return ntrial < b.ntrial; + } + }; + // number of failure trials + int num_trial; + // record all mock actions + std::map mock_map; + // used to generate all kinds of exceptions + inline void Verify(const MockKey &key) { + if (mock_map.count(key) != 0) { + num_trial += 1; + utils::Error("[%d]@@@Hit Mock Error", rank); + } + } +}; +} // namespace engine +} // namespace rabit +#endif // RABIT_ALLREDUCE_MOCK_H diff --git a/src/engine.cc b/src/engine.cc index efcb8616d..57e074109 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -16,7 +16,11 @@ namespace rabit { namespace engine { // singleton sync manager +#ifndef RABIT_USE_MOCK AllreduceRobust manager; +#else +AllreduceMock manager; +#endif /*! \brief intiialize the synchronization module */ void Init(int argc, char *argv[]) { diff --git a/src/engine_mock.cc b/src/engine_mock.cc new file mode 100644 index 000000000..e8a77a6a2 --- /dev/null +++ b/src/engine_mock.cc @@ -0,0 +1,15 @@ +/*! + * \file engine_mock.cc + * \brief this is an engine implementation that will + * insert failures in certain call point, to test if the engine is robust to failure + * \author Tianqi Chen + */ +// define use MOCK, os we will use mock Manager +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE +#define NOMINMAX +// switch engine to AllreduceMock +#define RABIT_USE_MOCK +#include "./allreduce_mock.h" +#include "./engine.cc" + From e40047f9c207bedda38d5750bf8cfc36ba7a0ba4 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 20 Dec 2014 18:38:54 -0800 Subject: [PATCH 132/531] new mock test --- test/Makefile | 2 +- test/keepalive.sh | 4 ++-- test/test.mk | 3 ++- test/test_model_recover.cpp | 4 +++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/test/Makefile b/test/Makefile index 3b5729b11..55e80d988 100644 --- a/test/Makefile +++ b/test/Makefile @@ -25,7 +25,7 @@ test_model_recover: test_model_recover.o $(RABIT_OBJ) test_local_recover: test_local_recover.o $(RABIT_OBJ) $(BIN) : - $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mock $(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) diff --git a/test/keepalive.sh b/test/keepalive.sh index 854de0c33..c4df061a9 100755 --- a/test/keepalive.sh +++ b/test/keepalive.sh @@ -7,8 +7,8 @@ then fi nrep=0 echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK -until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do +until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep; do sleep 1 nrep=$((nrep+1)) - echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep + echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep done diff --git a/test/test.mk b/test/test.mk index a7b6ceb51..947e64d42 100644 --- a/test/test.mk +++ b/test/test.mk @@ -17,4 +17,5 @@ local_recover_10_10k: # this experiment test recovery with actually process exit, use keepalive to keep program alive model_recover_10_10k: - ../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 + ../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 + diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index f3693fa24..86c0faa75 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -11,6 +11,7 @@ using namespace rabit; namespace rabit { namespace test { inline void CallBegin(const char *fun, int ntrial, int iter) { + return; int rank = rabit::GetRank(); if (!strcmp(fun, "Allreduce::Sum")) { if (ntrial == iter && rank == 0) exit(-1); @@ -20,6 +21,7 @@ inline void CallBegin(const char *fun, int ntrial, int iter) { } } inline void CallEnd(const char *fun, int ntrial, int iter) { + return; int rank = rabit::GetRank(); if (!strcmp(fun, "Allreduce::Bcast")) { if (ntrial == iter && rand() % 10 == rank) exit(-1); @@ -129,7 +131,7 @@ int main(int argc, char *argv[]) { int ntrial = 0; for (int i = 1; i < argc; ++i) { int n; - if (sscanf(argv[i], "repeat=%d", &n) == 1) ntrial = n; + if (sscanf(argv[i], "rabit_num_trial=%d", &n) == 1) ntrial = n; } while (true) { try { From cfea4dbe85fbceb7b0397d45573ef075aeaabc6e Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 21 Dec 2014 04:35:32 -0800 Subject: [PATCH 133/531] fix rabit for single node without initialization --- src/allreduce_base.cc | 4 +++- src/allreduce_base.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index b8b3ed0de..f2a104a90 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -21,7 +21,7 @@ AllreduceBase::AllreduceBase(void) { host_uri = ""; slave_port = 9010; nport_trial = 1000; - rank = -1; + rank = 0; world_size = -1; hadoop_mode = 0; version_number = 0; @@ -50,6 +50,8 @@ void AllreduceBase::Init(void) { this->SetParam("rabit_world_size", num_task); } } + // clear the setting before start reconnection + this->rank = -1; //--------------------- // start socket utils::Socket::Startup(); diff --git a/src/allreduce_base.h b/src/allreduce_base.h index f26b5ec45..bede2c228 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -58,6 +58,7 @@ class AllreduceBase : public IEngine { } /*! \brief get rank */ virtual int GetWorldSize(void) const { + if (world_size == -1) return 1; return world_size; } /*! \brief get rank */ From a624051b85f877b3e05f460b2edc044a51c8b5c9 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 21 Dec 2014 17:55:08 -0800 Subject: [PATCH 134/531] add keepalive to socket, fix recover problem when a node is requester and pass data --- src/allreduce_base.cc | 5 +++-- src/allreduce_mock.h | 10 +++++----- src/allreduce_robust.cc | 17 ++++++++++++----- src/socket.h | 10 ++++++++++ toolkit/kmeans.cpp | 1 - 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index f2a104a90..6fb4a11cb 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -252,8 +252,9 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { tree_links.plinks.clear(); for (size_t i = 0; i < all_links.size(); ++i) { utils::Assert(!all_links[i].sock.BadSocket(), "ReConnectLink: bad socket"); - // set the socket to non-blocking mode - all_links[i].sock.SetNonBlock(true); + // set the socket to non-blocking mode, enable TCP keepalive + all_links[i].sock.SetNonBlock(true); + all_links[i].sock.SetKeepAlive(true); if (tree_neighbors.count(all_links[i].rank) != 0) { if (all_links[i].rank == parent_rank) { parent_index = static_cast(tree_links.plinks.size()); diff --git a/src/allreduce_mock.h b/src/allreduce_mock.h index 36d760b70..f46ee6885 100644 --- a/src/allreduce_mock.h +++ b/src/allreduce_mock.h @@ -40,17 +40,17 @@ class AllreduceMock : public AllreduceRobust { ReduceFunction reducer, PreprocFunction prepare_fun, void *prepare_arg) { - this->Verify(MockKey(rank, version_number, seq_counter, num_trial)); + this->Verify(MockKey(rank, version_number, seq_counter, num_trial), "AllReduce"); AllreduceRobust::Allreduce(sendrecvbuf_, type_nbytes, count, reducer, prepare_fun, prepare_arg); } virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root) { - this->Verify(MockKey(rank, version_number, seq_counter, num_trial)); + this->Verify(MockKey(rank, version_number, seq_counter, num_trial), "Broadcast"); AllreduceRobust::Broadcast(sendrecvbuf_, total_size, root); } virtual void CheckPoint(const ISerializable *global_model, const ISerializable *local_model) { - this->Verify(MockKey(rank, version_number, seq_counter, num_trial)); + this->Verify(MockKey(rank, version_number, seq_counter, num_trial), "CheckPoint"); AllreduceRobust::CheckPoint(global_model, local_model); } @@ -82,10 +82,10 @@ class AllreduceMock : public AllreduceRobust { // record all mock actions std::map mock_map; // used to generate all kinds of exceptions - inline void Verify(const MockKey &key) { + inline void Verify(const MockKey &key, const char *name) { if (mock_map.count(key) != 0) { num_trial += 1; - utils::Error("[%d]@@@Hit Mock Error", rank); + utils::Error("[%d]@@@Hit Mock Error:%s", rank, name); } } }; diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index fe8013cb6..fb53a0777 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -431,7 +431,7 @@ ShortestDist(const std::pair &node_value, if (dist_in[i].first + 1 < res) { res = dist_in[i].first + 1; size = dist_in[i].second; - } + } } // add one hop @@ -575,7 +575,7 @@ AllreduceRobust::TryRecoverData(RecoverType role, } if (req_in[i] && links[i].size_write != size) { if (role == kHaveData || - (role == kPassData && links[recv_link].size_read != links[i].size_write)) { + (links[recv_link].size_read != links[i].size_write)) { selecter.WatchWrite(links[i].sock); } finished = false; @@ -728,10 +728,17 @@ AllreduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool re } int recv_link; std::vector req_in; - ReturnType succ = TryDecideRouting(role, &size, &recv_link, &req_in); + // size of data + size_t data_size = size; + ReturnType succ = TryDecideRouting(role, &data_size, &recv_link, &req_in); if (succ != kSuccess) return succ; - utils::Check(size != 0, "zero size check point is not allowed"); - return TryRecoverData(role, sendrecvbuf, size, recv_link, req_in); + utils::Check(data_size != 0, "zero size check point is not allowed"); + if (role == kRequestData || role == kHaveData) { + utils::Check(data_size == size, + "Allreduce Recovered data size do not match the specification of function call\n"\ + "Please check if calling sequence of recovered program is the same the original one in current VersionNumber"); + } + return TryRecoverData(role, sendrecvbuf, data_size, recv_link, req_in); } /*! * \brief try to run recover execution for a request action described by flag and seqno, diff --git a/src/socket.h b/src/socket.h index 899ab03a7..29d62db35 100644 --- a/src/socket.h +++ b/src/socket.h @@ -219,6 +219,16 @@ class TCPSocket : public Socket{ } explicit TCPSocket(SOCKET sockfd) : Socket(sockfd) { } + /*! + * \brief enable/disable TCP keepalive + * \param keepalive whether to set the keep alive option on + */ + inline void SetKeepAlive(bool keepalive) { + int opt = static_cast(keepalive); + if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &opt, sizeof(opt)) < 0) { + Socket::Error("SetKeepAlive"); + } + } /*! * \brief create the socket, call this before using socket * \param af domain diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index 3a55a0427..11f191625 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -151,4 +151,3 @@ int main(int argc, char *argv[]) { rabit::Finalize(); return 0; } - From 12399a1d42d2a0ad62400b728b1cbfb0d93b4365 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 21 Dec 2014 17:59:12 -0800 Subject: [PATCH 135/531] add more mocktest --- test/test.mk | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/test.mk b/test/test.mk index 947e64d42..5f943103e 100644 --- a/test/test.mk +++ b/test/test.mk @@ -19,3 +19,8 @@ local_recover_10_10k: model_recover_10_10k: ../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 +model_recover_10_10k_die_same: + ../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 + +model_recover_10_10k_die_hard: + ../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0 From dcb6e22a9e8be55cc5246b2e129ea307088cfeea Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 00:20:13 -0800 Subject: [PATCH 136/531] add mapred tasks --- tracker/rabit_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 4a2cdb718..79e0cb133 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -35,7 +35,7 @@ if hadoop_streaming_jar != None: args.hadoop_streaming_jar = hadoop_streaming_jar def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer \"/bin/cat\" -file %s' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper) + cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer \"/bin/cat\" -file %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper, nslaves) print cmd subprocess.check_call(cmd, shell = True) From 5fe3c58b4a1497f1d05459ba823e1db12d8c6e93 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 00:31:01 -0800 Subject: [PATCH 137/531] add kmeans hadoop --- toolkit/kmeans_hadoop.sh | 9 +++++++++ tracker/rabit_hadoop.py | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100755 toolkit/kmeans_hadoop.sh diff --git a/toolkit/kmeans_hadoop.sh b/toolkit/kmeans_hadoop.sh new file mode 100755 index 000000000..ed576f8b9 --- /dev/null +++ b/toolkit/kmeans_hadoop.sh @@ -0,0 +1,9 @@ +#!/bin/bash +if [ "$#" -lt 5 ]; +then + echo "Usage: " + exit -1 +fi +python ../rabit_hadoop.py -s $1 -i $2 -m kmeans --args "stdin "$3" "$4" stdout" -o $5 + + \ No newline at end of file diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 79e0cb133..c81e9d344 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -35,7 +35,8 @@ if hadoop_streaming_jar != None: args.hadoop_streaming_jar = hadoop_streaming_jar def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -input %s -output %s -mapper \"%s stdin %d %d stdout %s\" -reducer \"/bin/cat\" -file %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.nclusters, args.iterations, ' '.join(slave_args), args.mapper, nslaves) + cmd = '%s jar %s -input %s -output %s -mapper \"%s %s %s\" -reducer \"/bin/cat\" -file %s -D mapred.map.tasks=%d' + % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.args, ' '.join(slave_args), args.mapper, nslaves) print cmd subprocess.check_call(cmd, shell = True) From fd533d9a767d58b4ee190cc7a779448d673bfd8c Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 00:32:08 -0800 Subject: [PATCH 138/531] add kmeans --- toolkit/kmeans_hadoop.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/toolkit/kmeans_hadoop.sh b/toolkit/kmeans_hadoop.sh index ed576f8b9..bcd9adb3a 100755 --- a/toolkit/kmeans_hadoop.sh +++ b/toolkit/kmeans_hadoop.sh @@ -4,6 +4,6 @@ then echo "Usage: " exit -1 fi -python ../rabit_hadoop.py -s $1 -i $2 -m kmeans --args "stdin "$3" "$4" stdout" -o $5 - - \ No newline at end of file +#set path to hadoop streaming jar here +STREAMING_JAR= +python ../rabit_hadoop.py -hs $STREAMING_JAR -s $1 -i $2 -m kmeans --args "stdin "$3" "$4" stdout" -o $5 From 7a2ae105eaf93baa581af189f0bbe02f1b0991c7 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 01:03:12 -0800 Subject: [PATCH 139/531] fix script --- tracker/rabit_hadoop.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index c81e9d344..5c9f06948 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -35,8 +35,10 @@ if hadoop_streaming_jar != None: args.hadoop_streaming_jar = hadoop_streaming_jar def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -input %s -output %s -mapper \"%s %s %s\" -reducer \"/bin/cat\" -file %s -D mapred.map.tasks=%d' - % (args.hadoop_binary, args.hadoop_streaming_jar, args.input, args.output, args.mapper, args.args, ' '.join(slave_args), args.mapper, nslaves) + cmd = '%s jar %s' % (args.hadoop_binary, args.hadoop_streaming_jar) + cmd += ' -input %s -output %s' (args.input, args.output) + cmd += ' -mapper \"%s %s %s\" -reducer \"/bin/cat\" ' % (args.mapper, args.args, ' '.join(slave_args)) + cmd += ' -file %s -D mapred.map.tasks=%d' % (args.mapper, nslaves) print cmd subprocess.check_call(cmd, shell = True) From bb2ecc6ad5007b2ed5fc327e559c1bc65a9aea9d Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 01:10:14 -0800 Subject: [PATCH 140/531] remove c++11 --- toolkit/Makefile | 8 ++++---- toolkit/kmeans.cpp | 9 ++++++++- toolkit/kmeans_hadoop.sh | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/toolkit/Makefile b/toolkit/Makefile index a194ccd08..01a3cd83a 100644 --- a/toolkit/Makefile +++ b/toolkit/Makefile @@ -2,16 +2,16 @@ export CC = gcc export CXX = g++ export MPICXX = mpicxx export LDFLAGS= -pthread -lm -L../lib -export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include -std=c++11 +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include # specify tensor path -BIN = kmeans +BIN = kmeans.rabit # objectives that makes up rabit library OBJ = kmeans.o MPIBIN = kmeans.mpi .PHONY: clean all lib -all: $(BIN) $(MPIBIN) +all: $(BIN) lib: cd ..;make;cd - @@ -19,7 +19,7 @@ lib: kmeans.o: kmeans.cpp ../src/*.h # we can link against MPI version to get use MPI -kmeans: kmeans.o lib +kmeans.rabit: kmeans.o lib kmeans.mpi: kmeans.o lib $(BIN) : diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index 11f191625..e6be48fc0 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -115,7 +115,10 @@ int main(int argc, char *argv[]) { Matrix temp; for (int r = iter; r < max_iter; ++r) { temp.Init(num_cluster, num_feat + 1, 0.0f); - auto lazy_get_centroid = [&]() { +#if __cplusplus >= 201103L + auto lazy_get_centroid = [&]() +#endif + { // lambda function used to calculate the data if necessary // this function may not be called when the result can be directly recovered const size_t ndata = data.NumRow(); @@ -131,7 +134,11 @@ int main(int argc, char *argv[]) { } }; // call allreduce +#if __cplusplus >= 201103L rabit::Allreduce(&temp.data[0], temp.data.size(), lazy_get_centroid); +#else + rabit::Allreduce(&temp.data[0], temp.data.size()); +#endif // set number for (int k = 0; k < num_cluster; ++k) { float cnt = temp[k][num_feat]; diff --git a/toolkit/kmeans_hadoop.sh b/toolkit/kmeans_hadoop.sh index bcd9adb3a..d9a86bddd 100755 --- a/toolkit/kmeans_hadoop.sh +++ b/toolkit/kmeans_hadoop.sh @@ -6,4 +6,4 @@ then fi #set path to hadoop streaming jar here STREAMING_JAR= -python ../rabit_hadoop.py -hs $STREAMING_JAR -s $1 -i $2 -m kmeans --args "stdin "$3" "$4" stdout" -o $5 +python ../rabit_hadoop.py -hs $STREAMING_JAR -s $1 -i $2 -m kmeans.rabit --args "stdin "$3" "$4" stdout" -o $5 From dd8d9646c4bba6b1a023050d996a1f5624cd028d Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 01:25:06 -0800 Subject: [PATCH 141/531] rm mpi dep --- toolkit/Makefile | 10 ++++++---- toolkit/kmeans_hadoop.sh | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/toolkit/Makefile b/toolkit/Makefile index 01a3cd83a..b8035f62b 100644 --- a/toolkit/Makefile +++ b/toolkit/Makefile @@ -9,18 +9,20 @@ BIN = kmeans.rabit # objectives that makes up rabit library OBJ = kmeans.o MPIBIN = kmeans.mpi -.PHONY: clean all lib +.PHONY: clean all lib libmpi -all: $(BIN) +all: $(BIN) lib: - cd ..;make;cd - + cd ..;make lib/librabit.a;cd - +libmpi: + cd ..;make lib/librabit_mpi.a;cd - kmeans.o: kmeans.cpp ../src/*.h # we can link against MPI version to get use MPI kmeans.rabit: kmeans.o lib -kmeans.mpi: kmeans.o lib +kmeans.mpi: kmeans.o libmpi $(BIN) : $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit diff --git a/toolkit/kmeans_hadoop.sh b/toolkit/kmeans_hadoop.sh index d9a86bddd..23c3df688 100755 --- a/toolkit/kmeans_hadoop.sh +++ b/toolkit/kmeans_hadoop.sh @@ -6,4 +6,4 @@ then fi #set path to hadoop streaming jar here STREAMING_JAR= -python ../rabit_hadoop.py -hs $STREAMING_JAR -s $1 -i $2 -m kmeans.rabit --args "stdin "$3" "$4" stdout" -o $5 +python ../tracker/rabit_hadoop.py -hs $STREAMING_JAR -s $1 -i $2 -m kmeans.rabit --args "stdin "$3" "$4" stdout" -o $5 From 975bcc8261c66f42e913f4a3e4bf3768405dd060 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 01:26:59 -0800 Subject: [PATCH 142/531] fix --- tracker/rabit_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 5c9f06948..089e50335 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -36,7 +36,7 @@ if hadoop_streaming_jar != None: def hadoop_streaming(nslaves, slave_args): cmd = '%s jar %s' % (args.hadoop_binary, args.hadoop_streaming_jar) - cmd += ' -input %s -output %s' (args.input, args.output) + cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s %s %s\" -reducer \"/bin/cat\" ' % (args.mapper, args.args, ' '.join(slave_args)) cmd += ' -file %s -D mapred.map.tasks=%d' % (args.mapper, nslaves) print cmd From d3433c594655cff002d2cf379e0e657ebe2d5b3b Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 01:54:11 -0800 Subject: [PATCH 143/531] change script --- tracker/rabit_hadoop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 089e50335..7e5748aef 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -35,10 +35,10 @@ if hadoop_streaming_jar != None: args.hadoop_streaming_jar = hadoop_streaming_jar def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s' % (args.hadoop_binary, args.hadoop_streaming_jar) + cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nslaves) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s %s %s\" -reducer \"/bin/cat\" ' % (args.mapper, args.args, ' '.join(slave_args)) - cmd += ' -file %s -D mapred.map.tasks=%d' % (args.mapper, nslaves) + cmd += ' -file %s' % (args.mapper) print cmd subprocess.check_call(cmd, shell = True) From ab7492dbc27e05b8f2178644ce3f4b6f6e636767 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 03:24:00 -0800 Subject: [PATCH 144/531] add support for yarn --- src/allreduce_base.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 6fb4a11cb..30fcfa49f 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -43,6 +43,9 @@ void AllreduceBase::Init(void) { } // handling for hadoop const char *num_task = getenv("mapred_map_tasks"); + if (num_task == NULL) { + num_task = getenv("mapreduce_job_maps"); + } if (hadoop_mode != 0) { utils::Check(num_task != NULL, "hadoop_mode is set but cannot find mapred_map_tasks"); } From d82a6ed8116e71f437dd61319e26d070d03fcfaf Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 03:48:14 -0800 Subject: [PATCH 145/531] add file command --- toolkit/kmeans_hadoop.sh | 2 +- tracker/rabit_hadoop.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/toolkit/kmeans_hadoop.sh b/toolkit/kmeans_hadoop.sh index 23c3df688..9e7b3b832 100755 --- a/toolkit/kmeans_hadoop.sh +++ b/toolkit/kmeans_hadoop.sh @@ -6,4 +6,4 @@ then fi #set path to hadoop streaming jar here STREAMING_JAR= -python ../tracker/rabit_hadoop.py -hs $STREAMING_JAR -s $1 -i $2 -m kmeans.rabit --args "stdin "$3" "$4" stdout" -o $5 +python ../tracker/rabit_hadoop.py -hs $STREAMING_JAR -s $1 -i $2 -m kmeans.rabit --args "stdin "$3" "$4" stdout" -o $5 --file kmeans.rabit diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 7e5748aef..b86d91281 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -27,6 +27,7 @@ parser.add_argument('-i', '--input', required=True) parser.add_argument('-o', '--output', required=True) parser.add_argument('-m', '--mapper', required=True) parser.add_argument('-a', '--args', required=True) +parser.add_argument('-f', '--file', required=True) args = parser.parse_args() if hadoop_binary != None: @@ -38,7 +39,8 @@ def hadoop_streaming(nslaves, slave_args): cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nslaves) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s %s %s\" -reducer \"/bin/cat\" ' % (args.mapper, args.args, ' '.join(slave_args)) - cmd += ' -file %s' % (args.mapper) + for f in args.file.split('#'): + cmd += ' -file %s' % (f) print cmd subprocess.check_call(cmd, shell = True) From 6e6031cbe947f24d28d81ba111c7a6a202289079 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 03:59:01 -0800 Subject: [PATCH 146/531] add mock --- toolkit/Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/toolkit/Makefile b/toolkit/Makefile index b8035f62b..69819246a 100644 --- a/toolkit/Makefile +++ b/toolkit/Makefile @@ -5,16 +5,17 @@ export LDFLAGS= -pthread -lm -L../lib export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include # specify tensor path -BIN = kmeans.rabit +BIN = kmeans.rabit +MOCKBIN= kmeans.mock # objectives that makes up rabit library OBJ = kmeans.o MPIBIN = kmeans.mpi .PHONY: clean all lib libmpi -all: $(BIN) +all: $(BIN) $(MOCKBIN) lib: - cd ..;make lib/librabit.a;cd - + cd ..;make lib/librabit.a lib/librabit_mock.a; cd - libmpi: cd ..;make lib/librabit_mpi.a;cd - @@ -22,10 +23,13 @@ kmeans.o: kmeans.cpp ../src/*.h # we can link against MPI version to get use MPI kmeans.rabit: kmeans.o lib +kmeans.mock: kmeans.o lib kmeans.mpi: kmeans.o libmpi $(BIN) : $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit +$(MOCKBIN) : + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mock $(OBJ) : $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) From 0dd51d5dd07a97f9cd5d0eacf7678035234d1b8a Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 04:12:38 -0800 Subject: [PATCH 147/531] add attempt id for hadoop --- src/allreduce_base.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 30fcfa49f..859d0b5f3 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -33,7 +33,7 @@ AllreduceBase::AllreduceBase(void) { void AllreduceBase::Init(void) { // setup from enviroment variables {// handling for hadoop - const char *task_id = getenv("mapred_task_id"); + const char *task_id = getenv("mapred_tip_id"); if (hadoop_mode != 0) { utils::Check(task_id != NULL, "hadoop_mode is set but cannot find mapred_task_id"); } @@ -41,6 +41,14 @@ void AllreduceBase::Init(void) { this->SetParam("rabit_task_id", task_id); this->SetParam("rabit_hadoop_mode", "1"); } + const char *attempt_id = getenv("mapred_task_id"); + if (attempt_id != 0) { + const char *att = strrchr(attempt_id, '_'); + int num_trial; + if (att != NULL && sscanf(att+1, "%d", &num_trial) == 1) { + this->SetParam("rabit_num_trial", att + 1); + } + } // handling for hadoop const char *num_task = getenv("mapred_map_tasks"); if (num_task == NULL) { From 15836eb98e3ab96d6467bcc4acdcd8271d7edd02 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 22 Dec 2014 04:17:23 -0800 Subject: [PATCH 148/531] add task id --- src/allreduce_base.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 859d0b5f3..418b0fd66 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -34,6 +34,9 @@ void AllreduceBase::Init(void) { // setup from enviroment variables {// handling for hadoop const char *task_id = getenv("mapred_tip_id"); + if (task_id == NULL) { + task_id = getenv("mapreduce_task_id"); + } if (hadoop_mode != 0) { utils::Check(task_id != NULL, "hadoop_mode is set but cannot find mapred_task_id"); } @@ -45,7 +48,7 @@ void AllreduceBase::Init(void) { if (attempt_id != 0) { const char *att = strrchr(attempt_id, '_'); int num_trial; - if (att != NULL && sscanf(att+1, "%d", &num_trial) == 1) { + if (att != NULL && sscanf(att + 1, "%d", &num_trial) == 1) { this->SetParam("rabit_num_trial", att + 1); } } From 27d6977a3eff0801a2c3983bb602e8727e156454 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 28 Dec 2014 05:12:07 -0800 Subject: [PATCH 149/531] cpplint pass --- Makefile | 2 +- include/rabit.h | 19 ++-- include/rabit/engine.h | 13 +-- include/rabit/io.h | 65 ++++-------- include/rabit/utils.h | 11 +- include/rabit_serializable.h | 13 +-- src/allreduce_base.cc | 189 +++++++++++++++++++++-------------- src/allreduce_base.h | 33 +++--- src/allreduce_robust-inl.h | 40 +++++--- src/allreduce_robust.cc | 125 +++++++++++++---------- src/allreduce_robust.h | 88 ++++++++-------- src/engine.cc | 8 +- src/engine_empty.cc | 13 ++- src/engine_mock.cc | 1 + src/engine_mpi.cc | 34 ++++--- src/socket.h | 80 ++++++++------- 16 files changed, 406 insertions(+), 328 deletions(-) diff --git a/Makefile b/Makefile index 32c7070b3..64cba30fa 100644 --- a/Makefile +++ b/Makefile @@ -36,4 +36,4 @@ $(ALIB): ar cr $@ $+ clean: - $(RM) $(OBJ) $(MPIOBJ) $(ALIB) $(MPIALIB) *~ src/*~ + $(RM) $(OBJ) $(MPIOBJ) $(ALIB) $(MPIALIB) *~ src/*~ include/*~ include/*/*~ diff --git a/include/rabit.h b/include/rabit.h index e3f9edfb3..5b2db3098 100644 --- a/include/rabit.h +++ b/include/rabit.h @@ -1,6 +1,5 @@ -#ifndef RABIT_RABIT_H -#define RABIT_RABIT_H /*! + * Copyright (c) 2014 by Contributors * \file rabit.h * \brief This file defines unified Allreduce/Broadcast interface of rabit * The actual implementation is redirected to rabit engine @@ -9,12 +8,14 @@ * rabit.h and serializable.h is all the user need to use rabit interface * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ +#ifndef RABIT_RABIT_H_ +#define RABIT_RABIT_H_ #include #include // optionally support of lambda function in C++11, if available #if __cplusplus >= 201103L #include -#endif // C++11 +#endif // C++11 // contains definition of ISerializable #include "./rabit_serializable.h" // engine definition of rabit, defines internal implementation @@ -116,7 +117,7 @@ inline void Broadcast(std::string *sendrecv_data, int root); */ template inline void Allreduce(DType *sendrecvbuf, size_t count, - void (*prepare_fun)(void *arg) = NULL, + void (*prepare_fun)(void *arg) = NULL, void *prepare_arg = NULL); // C++11 support for lambda prepare function @@ -142,9 +143,9 @@ inline void Allreduce(DType *sendrecvbuf, size_t count, * \tparam DType type of data */ template -inline void Allreduce(DType *sendrecvbuf, size_t count, std::function prepare_fun); -#endif // C++11 - +inline void Allreduce(DType *sendrecvbuf, size_t count, + std::function prepare_fun); +#endif // C++11 /*! * \brief load latest check point * \param global_model pointer to the globally shared model/state @@ -228,6 +229,7 @@ class Reducer { inline void Allreduce(DType *sendrecvbuf, size_t count, std::function prepare_fun); #endif + private: /*! \brief function handle to do reduce */ engine::ReduceHandle handle_; @@ -274,6 +276,7 @@ class SerializeReducer { size_t max_nbyte, size_t count, std::function prepare_fun); #endif + private: /*! \brief function handle to do reduce */ engine::ReduceHandle handle_; @@ -283,4 +286,4 @@ class SerializeReducer { } // namespace rabit // implementation of template functions #include "./rabit/rabit-inl.h" -#endif // RABIT_ALLREDUCE_H +#endif // RABIT_RABIT_H_ diff --git a/include/rabit/engine.h b/include/rabit/engine.h index ce2f85b66..ce8fb6ee5 100644 --- a/include/rabit/engine.h +++ b/include/rabit/engine.h @@ -1,10 +1,12 @@ /*! + * Copyright (c) 2014 by Contributors * \file engine.h * \brief This file defines the core interface of allreduce library * \author Tianqi Chen, Nacho, Tianyi */ -#ifndef RABIT_ENGINE_H -#define RABIT_ENGINE_H +#ifndef RABIT_ENGINE_H_ +#define RABIT_ENGINE_H_ +#include #include "../rabit_serializable.h" namespace MPI { @@ -122,7 +124,7 @@ class IEngine { virtual int GetRank(void) const = 0; /*! \brief get total number of */ virtual int GetWorldSize(void) const = 0; - /*! \brief get the host name of current node */ + /*! \brief get the host name of current node */ virtual std::string GetHost(void) const = 0; /*! * \brief print the msg in the tracker, @@ -211,7 +213,7 @@ class ReduceHandle { /*! \return the number of bytes occupied by the type */ static int TypeSize(const MPI::Datatype &dtype); - private: + protected: // handle data field void *handle_; // handle to the type field @@ -221,5 +223,4 @@ class ReduceHandle { }; } // namespace engine } // namespace rabit -#endif // RABIT_ENGINE_H - +#endif // RABIT_ENGINE_H_ diff --git a/include/rabit/io.h b/include/rabit/io.h index 44d0a0505..29fa7e812 100644 --- a/include/rabit/io.h +++ b/include/rabit/io.h @@ -1,16 +1,19 @@ -#ifndef RABIT_UTILS_IO_H -#define RABIT_UTILS_IO_H -#include -#include -#include -#include -#include "./utils.h" -#include "../rabit_serializable.h" /*! + * Copyright (c) 2014 by Contributors * \file io.h * \brief utilities that implements different serializable interface * \author Tianqi Chen */ +#ifndef RABIT_UTILS_IO_H_ +#define RABIT_UTILS_IO_H_ +#include +#include +#include +#include +#include +#include "./utils.h" +#include "../rabit_serializable.h" + namespace rabit { namespace utils { /*! \brief interface of i/o stream that support seek */ @@ -25,8 +28,9 @@ class ISeekStream: public IStream { /*! \brief fixed size memory buffer */ struct MemoryFixSizeBuffer : public ISeekStream { public: - MemoryFixSizeBuffer(void *p_buffer, size_t buffer_size) - : p_buffer_(reinterpret_cast(p_buffer)), buffer_size_(buffer_size) { + MemoryFixSizeBuffer(void *p_buffer, size_t buffer_size) + : p_buffer_(reinterpret_cast(p_buffer)), + buffer_size_(buffer_size) { curr_ptr_ = 0; } virtual ~MemoryFixSizeBuffer(void) {} @@ -40,7 +44,7 @@ struct MemoryFixSizeBuffer : public ISeekStream { } virtual void Write(const void *ptr, size_t size) { if (size == 0) return; - utils::Assert(curr_ptr_ + size <= buffer_size_, + utils::Assert(curr_ptr_ + size <= buffer_size_, "write position exceed fixed buffer size"); memcpy(p_buffer_ + curr_ptr_, ptr, size); curr_ptr_ += size; @@ -59,12 +63,12 @@ struct MemoryFixSizeBuffer : public ISeekStream { size_t buffer_size_; /*! \brief current pointer */ size_t curr_ptr_; -}; // class MemoryFixSizeBuffer +}; // class MemoryFixSizeBuffer /*! \brief a in memory buffer that can be read and write as stream interface */ struct MemoryBufferStream : public ISeekStream { public: - MemoryBufferStream(std::string *p_buffer) + explicit MemoryBufferStream(std::string *p_buffer) : p_buffer_(p_buffer) { curr_ptr_ = 0; } @@ -82,7 +86,7 @@ struct MemoryBufferStream : public ISeekStream { if (curr_ptr_ + size > p_buffer_->length()) { p_buffer_->resize(curr_ptr_+size); } - memcpy(&(*p_buffer_)[0] + curr_ptr_, ptr, size); + memcpy(&(*p_buffer_)[0] + curr_ptr_, ptr, size); curr_ptr_ += size; } virtual void Seek(size_t pos) { @@ -97,36 +101,7 @@ struct MemoryBufferStream : public ISeekStream { std::string *p_buffer_; /*! \brief current pointer */ size_t curr_ptr_; -}; // class MemoryBufferStream - -/*! \brief implementation of file i/o stream */ -class FileStream : public ISeekStream { - public: - explicit FileStream(FILE *fp) : fp(fp) {} - explicit FileStream(void) { - this->fp = NULL; - } - virtual size_t Read(void *ptr, size_t size) { - return std::fread(ptr, size, 1, fp); - } - virtual void Write(const void *ptr, size_t size) { - std::fwrite(ptr, size, 1, fp); - } - virtual void Seek(size_t pos) { - std::fseek(fp, static_cast(pos), SEEK_SET); - } - virtual size_t Tell(void) { - return std::ftell(fp); - } - inline void Close(void) { - if (fp != NULL){ - std::fclose(fp); fp = NULL; - } - } - - private: - FILE *fp; -}; +}; // class MemoryBufferStream } // namespace utils } // namespace rabit -#endif +#endif // RABIT_UTILS_IO_H_ diff --git a/include/rabit/utils.h b/include/rabit/utils.h index beae6589f..696000fac 100644 --- a/include/rabit/utils.h +++ b/include/rabit/utils.h @@ -1,10 +1,11 @@ -#ifndef RABIT_UTILS_H_ -#define RABIT_UTILS_H_ /*! + * Copyright (c) 2014 by Contributors * \file utils.h * \brief simple utils to support the code * \author Tianqi Chen */ +#ifndef RABIT_UTILS_H_ +#define RABIT_UTILS_H_ #define _CRT_SECURE_NO_WARNINGS #include #include @@ -19,7 +20,7 @@ #define fopen64 std::fopen #endif #ifdef _MSC_VER -// NOTE: sprintf_s is not equivalent to snprintf, +// NOTE: sprintf_s is not equivalent to snprintf, // they are equivalent when success, which is sufficient for our case #define snprintf sprintf_s #define vsnprintf vsprintf_s @@ -30,7 +31,7 @@ #endif #endif -#ifdef __APPLE__ +#ifdef __APPLE__ #define off64_t off_t #define fopen64 std::fopen #endif @@ -186,5 +187,5 @@ inline const char* BeginPtr(const std::string &str) { if (str.length() == 0) return NULL; return &str[0]; } -} // namespace rabit +} // namespace rabit #endif // RABIT_UTILS_H_ diff --git a/include/rabit_serializable.h b/include/rabit_serializable.h index eabc03f81..0b2ccf3cb 100644 --- a/include/rabit_serializable.h +++ b/include/rabit_serializable.h @@ -1,13 +1,14 @@ -#ifndef RABIT_RABIT_SERIALIZABLE_H -#define RABIT_RABIT_SERIALIZABLE_H -#include -#include -#include "./rabit/utils.h" /*! + * Copyright (c) 2014 by Contributors * \file serializable.h * \brief defines serializable interface of rabit * \author Tianqi Chen */ +#ifndef RABIT_RABIT_SERIALIZABLE_H_ +#define RABIT_RABIT_SERIALIZABLE_H_ +#include +#include +#include "./rabit/utils.h" namespace rabit { /*! * \brief interface of stream I/O, used by ISerializable @@ -96,4 +97,4 @@ class ISerializable { virtual void Save(IStream &fo) const = 0; }; } // namespace rabit -#endif +#endif // RABIT_RABIT_SERIALIZABLE_H_ diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 418b0fd66..671c53877 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2014 by Contributors * \file allreduce_base.cc * \brief Basic implementation of AllReduce * @@ -32,13 +33,15 @@ AllreduceBase::AllreduceBase(void) { // initialization function void AllreduceBase::Init(void) { // setup from enviroment variables - {// handling for hadoop + { + // handling for hadoop const char *task_id = getenv("mapred_tip_id"); if (task_id == NULL) { task_id = getenv("mapreduce_task_id"); } if (hadoop_mode != 0) { - utils::Check(task_id != NULL, "hadoop_mode is set but cannot find mapred_task_id"); + utils::Check(task_id != NULL, + "hadoop_mode is set but cannot find mapred_task_id"); } if (task_id != NULL) { this->SetParam("rabit_task_id", task_id); @@ -48,7 +51,7 @@ void AllreduceBase::Init(void) { if (attempt_id != 0) { const char *att = strrchr(attempt_id, '_'); int num_trial; - if (att != NULL && sscanf(att + 1, "%d", &num_trial) == 1) { + if (att != NULL && sscanf(att + 1, "%d", &num_trial) == 1) { this->SetParam("rabit_num_trial", att + 1); } } @@ -58,7 +61,8 @@ void AllreduceBase::Init(void) { num_task = getenv("mapreduce_job_maps"); } if (hadoop_mode != 0) { - utils::Check(num_task != NULL, "hadoop_mode is set but cannot find mapred_map_tasks"); + utils::Check(num_task != NULL, + "hadoop_mode is set but cannot find mapred_map_tasks"); } if (num_task != NULL) { this->SetParam("rabit_world_size", num_task); @@ -81,11 +85,11 @@ void AllreduceBase::Shutdown(void) { } all_links.clear(); tree_links.plinks.clear(); - + if (tracker_uri == "NULL") return; // notify tracker rank i have shutdown utils::TCPSocket tracker = this->ConnectTracker(); - tracker.SendStr(std::string("shutdown")); + tracker.SendStr(std::string("shutdown")); tracker.Close(); utils::TCPSocket::Finalize(); } @@ -107,11 +111,11 @@ void AllreduceBase::SetParam(const char *name, const char *val) { if (!strcmp(name, "rabit_tracker_uri")) tracker_uri = val; if (!strcmp(name, "rabit_tracker_port")) tracker_port = atoi(val); if (!strcmp(name, "rabit_task_id")) task_id = val; - if (!strcmp(name, "rabit_world_size")) world_size = atoi(val); + if (!strcmp(name, "rabit_world_size")) world_size = atoi(val); if (!strcmp(name, "rabit_hadoop_mode")) hadoop_mode = atoi(val); if (!strcmp(name, "rabit_reduce_buffer")) { char unit; - unsigned long amount; + uint64_t amount; if (sscanf(val, "%lu%c", &amount, &unit) == 2) { switch (unit) { case 'B': reduce_buffer_size = (amount + 7)/ 8; break; @@ -121,7 +125,8 @@ void AllreduceBase::SetParam(const char *name, const char *val) { default: utils::Error("invalid format for reduce buffer"); } } else { - utils::Error("invalid format for reduce_buffer, shhould be {integer}{unit}, unit can be {B, KB, MB, GB}"); + utils::Error("invalid format for reduce_buffer,"\ + "shhould be {integer}{unit}, unit can be {B, KB, MB, GB}"); } } } @@ -137,11 +142,16 @@ utils::TCPSocket AllreduceBase::ConnectTracker(void) const { if (!tracker.Connect(utils::SockAddr(tracker_uri.c_str(), tracker_port))) { utils::Socket::Error("Connect"); } - utils::Assert(tracker.SendAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 1"); - utils::Assert(tracker.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "ReConnectLink failure 2"); + using utils::Assert; + Assert(tracker.SendAll(&magic, sizeof(magic)) == sizeof(magic), + "ReConnectLink failure 1"); + Assert(tracker.RecvAll(&magic, sizeof(magic)) == sizeof(magic), + "ReConnectLink failure 2"); utils::Check(magic == kMagic, "sync::Invalid tracker message, init failure"); - utils::Assert(tracker.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 3"); - utils::Assert(tracker.SendAll(&world_size, sizeof(world_size)) == sizeof(world_size), "ReConnectLink failure 3"); + Assert(tracker.SendAll(&rank, sizeof(rank)) == sizeof(rank), + "ReConnectLink failure 3"); + Assert(tracker.SendAll(&world_size, sizeof(world_size)) == sizeof(world_size), + "ReConnectLink failure 3"); tracker.SendStr(task_id); return tracker; } @@ -161,29 +171,30 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { int prev_rank, next_rank; // the rank of neighbors std::map tree_neighbors; - {// get new ranks - int newrank, num_neighbors; - utils::Assert(tracker.RecvAll(&newrank, sizeof(newrank)) == sizeof(newrank), - "ReConnectLink failure 4"); - utils::Assert(tracker.RecvAll(&parent_rank, sizeof(parent_rank)) == sizeof(parent_rank), - "ReConnectLink failure 4"); - utils::Assert(tracker.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), - "ReConnectLink failure 4"); - utils::Assert(rank == -1 || newrank == rank, "must keep rank to same if the node already have one"); - rank = newrank; - utils::Assert(tracker.RecvAll(&num_neighbors, sizeof(num_neighbors)) == sizeof(num_neighbors), - "ReConnectLink failure 4"); - for (int i = 0; i < num_neighbors; ++i) { - int nrank; - utils::Assert(tracker.RecvAll(&nrank, sizeof(nrank)) == sizeof(nrank), - "ReConnectLink failure 4"); - tree_neighbors[nrank] = 1; - } - utils::Assert(tracker.RecvAll(&prev_rank, sizeof(prev_rank)) == sizeof(prev_rank), - "ReConnectLink failure 4"); - utils::Assert(tracker.RecvAll(&next_rank, sizeof(next_rank)) == sizeof(next_rank), - "ReConnectLink failure 4"); + using utils::Assert; + // get new ranks + int newrank, num_neighbors; + Assert(tracker.RecvAll(&newrank, sizeof(newrank)) == sizeof(newrank), + "ReConnectLink failure 4"); + Assert(tracker.RecvAll(&parent_rank, sizeof(parent_rank)) ==\ + sizeof(parent_rank), "ReConnectLink failure 4"); + Assert(tracker.RecvAll(&world_size, sizeof(world_size)) == sizeof(world_size), + "ReConnectLink failure 4"); + Assert(rank == -1 || newrank == rank, + "must keep rank to same if the node already have one"); + rank = newrank; + Assert(tracker.RecvAll(&num_neighbors, sizeof(num_neighbors)) == \ + sizeof(num_neighbors), "ReConnectLink failure 4"); + for (int i = 0; i < num_neighbors; ++i) { + int nrank; + Assert(tracker.RecvAll(&nrank, sizeof(nrank)) == sizeof(nrank), + "ReConnectLink failure 4"); + tree_neighbors[nrank] = 1; } + Assert(tracker.RecvAll(&prev_rank, sizeof(prev_rank)) == sizeof(prev_rank), + "ReConnectLink failure 4"); + Assert(tracker.RecvAll(&next_rank, sizeof(next_rank)) == sizeof(next_rank), + "ReConnectLink failure 4"); // create listening socket utils::TCPSocket sock_listen; sock_listen.Create(); @@ -204,56 +215,67 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { } } int ngood = static_cast(good_link.size()); - utils::Assert(tracker.SendAll(&ngood, sizeof(ngood)) == sizeof(ngood), - "ReConnectLink failure 5"); + Assert(tracker.SendAll(&ngood, sizeof(ngood)) == sizeof(ngood), + "ReConnectLink failure 5"); for (size_t i = 0; i < good_link.size(); ++i) { - utils::Assert(tracker.SendAll(&good_link[i], sizeof(good_link[i])) == sizeof(good_link[i]), - "ReConnectLink failure 6"); + Assert(tracker.SendAll(&good_link[i], sizeof(good_link[i])) == \ + sizeof(good_link[i]), "ReConnectLink failure 6"); } - utils::Assert(tracker.RecvAll(&num_conn, sizeof(num_conn)) == sizeof(num_conn), - "ReConnectLink failure 7"); - utils::Assert(tracker.RecvAll(&num_accept, sizeof(num_accept)) == sizeof(num_accept), - "ReConnectLink failure 8"); + Assert(tracker.RecvAll(&num_conn, sizeof(num_conn)) == sizeof(num_conn), + "ReConnectLink failure 7"); + Assert(tracker.RecvAll(&num_accept, sizeof(num_accept)) == \ + sizeof(num_accept), "ReConnectLink failure 8"); num_error = 0; for (int i = 0; i < num_conn; ++i) { LinkRecord r; int hport, hrank; std::string hname; - tracker.RecvStr(&hname); - utils::Assert(tracker.RecvAll(&hport, sizeof(hport)) == sizeof(hport), "ReConnectLink failure 9"); - utils::Assert(tracker.RecvAll(&hrank, sizeof(hrank)) == sizeof(hrank), "ReConnectLink failure 10"); + tracker.RecvStr(&hname); + Assert(tracker.RecvAll(&hport, sizeof(hport)) == sizeof(hport), + "ReConnectLink failure 9"); + Assert(tracker.RecvAll(&hrank, sizeof(hrank)) == sizeof(hrank), + "ReConnectLink failure 10"); r.sock.Create(); if (!r.sock.Connect(utils::SockAddr(hname.c_str(), hport))) { num_error += 1; r.sock.Close(); continue; } - utils::Assert(r.sock.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 12"); - utils::Assert(r.sock.RecvAll(&r.rank, sizeof(r.rank)) == sizeof(r.rank), "ReConnectLink failure 13"); - utils::Check(hrank == r.rank, "ReConnectLink failure, link rank inconsistent"); + Assert(r.sock.SendAll(&rank, sizeof(rank)) == sizeof(rank), + "ReConnectLink failure 12"); + Assert(r.sock.RecvAll(&r.rank, sizeof(r.rank)) == sizeof(r.rank), + "ReConnectLink failure 13"); + utils::Check(hrank == r.rank, + "ReConnectLink failure, link rank inconsistent"); bool match = false; for (size_t i = 0; i < all_links.size(); ++i) { if (all_links[i].rank == hrank) { - utils::Assert(all_links[i].sock.IsClosed(), "Override a link that is active"); + Assert(all_links[i].sock.IsClosed(), + "Override a link that is active"); all_links[i].sock = r.sock; match = true; break; } } if (!match) all_links.push_back(r); } - utils::Assert(tracker.SendAll(&num_error, sizeof(num_error)) == sizeof(num_error), "ReConnectLink failure 14"); + Assert(tracker.SendAll(&num_error, sizeof(num_error)) == sizeof(num_error), + "ReConnectLink failure 14"); } while (num_error != 0); // send back socket listening port to tracker - utils::Assert(tracker.SendAll(&port, sizeof(port)) == sizeof(port), "ReConnectLink failure 14"); + Assert(tracker.SendAll(&port, sizeof(port)) == sizeof(port), + "ReConnectLink failure 14"); // close connection to tracker - tracker.Close(); + tracker.Close(); // listen to incoming links for (int i = 0; i < num_accept; ++i) { LinkRecord r; r.sock = sock_listen.Accept(); - utils::Assert(r.sock.SendAll(&rank, sizeof(rank)) == sizeof(rank), "ReConnectLink failure 15"); - utils::Assert(r.sock.RecvAll(&r.rank, sizeof(r.rank)) == sizeof(r.rank), "ReConnectLink failure 15"); + Assert(r.sock.SendAll(&rank, sizeof(rank)) == sizeof(rank), + "ReConnectLink failure 15"); + Assert(r.sock.RecvAll(&r.rank, sizeof(r.rank)) == sizeof(r.rank), + "ReConnectLink failure 15"); bool match = false; for (size_t i = 0; i < all_links.size(); ++i) { if (all_links[i].rank == r.rank) { - utils::Assert(all_links[i].sock.IsClosed(), "Override a link that is active"); + utils::Assert(all_links[i].sock.IsClosed(), + "Override a link that is active"); all_links[i].sock = r.sock; match = true; break; } } @@ -278,9 +300,12 @@ void AllreduceBase::ReConnectLinks(const char *cmd) { if (all_links[i].rank == prev_rank) ring_prev = &all_links[i]; if (all_links[i].rank == next_rank) ring_next = &all_links[i]; } - utils::Assert(parent_rank == -1 || parent_index != -1, "cannot find parent in the link"); - utils::Assert(prev_rank == -1 || ring_prev != NULL, "cannot find prev ring in the link"); - utils::Assert(next_rank == -1 || ring_next != NULL, "cannot find next ring in the link"); + Assert(parent_rank == -1 || parent_index != -1, + "cannot find parent in the link"); + Assert(prev_rank == -1 || ring_prev != NULL, + "cannot find prev ring in the link"); + Assert(next_rank == -1 || ring_next != NULL, + "cannot find next ring in the link"); } /*! * \brief perform in-place allreduce, on sendrecvbuf, this function can fail, and will return the cause of failure @@ -326,7 +351,7 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, // if no childs, no need to reduce if (nlink == static_cast(parent_index != -1)) { size_up_reduce = total_size; - } + } // while we have not passed the messages out while (true) { // select helper @@ -347,7 +372,7 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, if (links[i].size_read != total_size) { selecter.WatchRead(links[i].sock); } - // size_write <= size_read + // size_write <= size_read if (links[i].size_write != total_size) { selecter.WatchWrite(links[i].sock); // only watch for exception in live channels @@ -358,11 +383,11 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, } // finish runing allreduce if (finished) break; - // select must return + // select must return selecter.Select(); // exception handling for (int i = 0; i < nlink; ++i) { - // recive OOB message from some link + // recive OOB message from some link if (selecter.CheckExcept(links[i].sock)) return kGetExcept; } // read data from childs @@ -392,7 +417,8 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, // start position size_t start = size_up_reduce % buffer_size; // peform read till end of buffer - size_t nread = std::min(buffer_size - start, max_reduce - size_up_reduce); + size_t nread = std::min(buffer_size - start, + max_reduce - size_up_reduce); utils::Assert(nread % type_nbytes == 0, "Allreduce: size check"); for (int i = 0; i < nlink; ++i) { if (i != parent_index) { @@ -407,7 +433,7 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, } if (parent_index != -1) { // pass message up to parent, can pass data that are already been reduced - if (selecter.CheckWrite(links[parent_index].sock)) { + if (selecter.CheckWrite(links[parent_index].sock)) { ssize_t len = links[parent_index].sock. Send(sendrecvbuf + size_up_out, size_up_reduce - size_up_out); if (len != -1) { @@ -417,7 +443,8 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, } } // read data from parent - if (selecter.CheckRead(links[parent_index].sock) && total_size > size_down_in) { + if (selecter.CheckRead(links[parent_index].sock) && + total_size > size_down_in) { ssize_t len = links[parent_index].sock. Recv(sendrecvbuf + size_down_in, total_size - size_down_in); if (len == 0) { @@ -425,7 +452,8 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, } if (len != -1) { size_down_in += static_cast(len); - utils::Assert(size_down_in <= size_up_out, "Allreduce: boundary error"); + utils::Assert(size_down_in <= size_up_out, + "Allreduce: boundary error"); } else { if (errno != EAGAIN && errno != EWOULDBLOCK) return kSockError; } @@ -437,11 +465,13 @@ AllreduceBase::TryAllreduce(void *sendrecvbuf_, // can pass message down to childs for (int i = 0; i < nlink; ++i) { if (i != parent_index && selecter.CheckWrite(links[i].sock)) { - if (!links[i].WriteFromArray(sendrecvbuf, size_down_in)) return kSockError; + if (!links[i].WriteFromArray(sendrecvbuf, size_down_in)) { + return kSockError; + } } } } - return kSuccess; + return kSuccess; } /*! * \brief broadcast data from root to all nodes, this function can fail,and will return the cause of failure @@ -455,14 +485,15 @@ AllreduceBase::ReturnType AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { RefLinkVector &links = tree_links; if (links.size() == 0 || total_size == 0) return kSuccess; - utils::Check(root < world_size, "Broadcast: root should be smaller than world size"); + utils::Check(root < world_size, + "Broadcast: root should be smaller than world size"); // number of links const int nlink = static_cast(links.size()); // size of space already read from data size_t size_in = 0; // input link, -2 means unknown yet, -1 means this is root int in_link = -2; - + // initialize the link statistics for (int i = 0; i < nlink; ++i) { links[i].ResetSize(); @@ -471,9 +502,9 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { if (this->rank == root) { size_in = total_size; in_link = -1; - } + } // while we have not passed the messages out - while(true) { + while (true) { bool finished = true; // select helper utils::SelectHelper selecter; @@ -487,7 +518,7 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { if (in_link != -2 && i != in_link && links[i].size_write != total_size) { selecter.WatchWrite(links[i].sock); finished = false; } - selecter.WatchException(links[i].sock); + selecter.WatchException(links[i].sock); } // finish running if (finished) break; @@ -495,14 +526,16 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { selecter.Select(); // exception handling for (int i = 0; i < nlink; ++i) { - // recive OOB message from some link + // recive OOB message from some link if (selecter.CheckExcept(links[i].sock)) return kGetExcept; } if (in_link == -2) { // probe in-link for (int i = 0; i < nlink; ++i) { if (selecter.CheckRead(links[i].sock)) { - if (!links[i].ReadToArray(sendrecvbuf_, total_size)) return kSockError; + if (!links[i].ReadToArray(sendrecvbuf_, total_size)) { + return kSockError; + } size_in = links[i].size_read; if (size_in != 0) { in_link = i; break; @@ -512,7 +545,9 @@ AllreduceBase::TryBroadcast(void *sendrecvbuf_, size_t total_size, int root) { } else { // read from in link if (in_link >= 0 && selecter.CheckRead(links[in_link].sock)) { - if(!links[in_link].ReadToArray(sendrecvbuf_, total_size)) return kSockError; + if (!links[in_link].ReadToArray(sendrecvbuf_, total_size)) { + return kSockError; + } size_in = links[in_link].size_read; } } diff --git a/src/allreduce_base.h b/src/allreduce_base.h index bede2c228..14a8cf339 100644 --- a/src/allreduce_base.h +++ b/src/allreduce_base.h @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2014 by Contributors * \file allreduce_base.h * \brief Basic implementation of AllReduce * using TCP non-block socket and tree-shape reduction. @@ -8,13 +9,14 @@ * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ -#ifndef RABIT_ALLREDUCE_BASE_H -#define RABIT_ALLREDUCE_BASE_H +#ifndef RABIT_ALLREDUCE_BASE_H_ +#define RABIT_ALLREDUCE_BASE_H_ #include #include -#include -#include +#include +#include "rabit/utils.h" +#include "rabit/engine.h" #include "./socket.h" namespace MPI { @@ -22,7 +24,7 @@ namespace MPI { class Datatype { public: size_t type_size; - Datatype(size_t type_size) : type_size(type_size) {} + explicit Datatype(size_t type_size) : type_size(type_size) {} }; } namespace rabit { @@ -31,7 +33,7 @@ namespace engine { class AllreduceBase : public IEngine { public: // magic number to verify server - const static int kMagic = 0xff99; + static const int kMagic = 0xff99; // constant one byte out of band message to indicate error happening AllreduceBase(void); virtual ~AllreduceBase(void) {} @@ -79,12 +81,13 @@ class AllreduceBase : public IEngine { */ virtual void Allreduce(void *sendrecvbuf_, size_t type_nbytes, - size_t count, + size_t count, ReduceFunction reducer, PreprocFunction prepare_fun = NULL, void *prepare_arg = NULL) { if (prepare_fun != NULL) prepare_fun(prepare_arg); - utils::Assert(TryAllreduce(sendrecvbuf_, type_nbytes, count, reducer) == kSuccess, + utils::Assert(TryAllreduce(sendrecvbuf_, + type_nbytes, count, reducer) == kSuccess, "Allreduce failed"); } /*! @@ -201,12 +204,16 @@ class AllreduceBase : public IEngine { // constructor LinkRecord(void) {} // initialize buffer - inline void InitBuffer(size_t type_nbytes, size_t count, size_t reduce_buffer_size) { + inline void InitBuffer(size_t type_nbytes, size_t count, + size_t reduce_buffer_size) { size_t n = (type_nbytes * count + 7)/ 8; buffer_.resize(std::min(reduce_buffer_size, n)); // make sure align to type_nbytes - buffer_size = buffer_.size() * sizeof(uint64_t) / type_nbytes * type_nbytes; - utils::Assert(type_nbytes <= buffer_size, "too large type_nbytes=%lu, buffer_size=%lu", type_nbytes, buffer_size); + buffer_size = + buffer_.size() * sizeof(uint64_t) / type_nbytes * type_nbytes; + utils::Assert(type_nbytes <= buffer_size, + "too large type_nbytes=%lu, buffer_size=%lu", + type_nbytes, buffer_size); // set buffer head buffer_head = reinterpret_cast(BeginPtr(buffer_)); } @@ -225,7 +232,7 @@ class AllreduceBase : public IEngine { size_t ngap = size_read - protect_start; utils::Assert(ngap <= buffer_size, "Allreduce: boundary check"); size_t offset = size_read % buffer_size; - size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); + size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); if (nmax == 0) return true; ssize_t len = sock.Recv(buffer_head + offset, nmax); // length equals 0, remote disconnected @@ -235,7 +242,7 @@ class AllreduceBase : public IEngine { if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK; size_read += static_cast(len); return true; - } + } /*! * \brief read data into array, * this function can not be used together with ReadToRingBuffer diff --git a/src/allreduce_robust-inl.h b/src/allreduce_robust-inl.h index 49f8f2c37..e0250e426 100644 --- a/src/allreduce_robust-inl.h +++ b/src/allreduce_robust-inl.h @@ -1,11 +1,13 @@ /*! + * Copyright (c) 2014 by Contributors * \file allreduce_robust-inl.h * \brief implementation of inline template function in AllreduceRobust * * \author Tianqi Chen */ -#ifndef RABIT_ENGINE_ROBUST_INL_H -#define RABIT_ENGINE_ROBUST_INL_H +#ifndef RABIT_ENGINE_ROBUST_INL_H_ +#define RABIT_ENGINE_ROBUST_INL_H_ +#include namespace rabit { namespace engine { @@ -33,14 +35,14 @@ inline AllreduceRobust::ReturnType AllreduceRobust::MsgPassing(const NodeType &node_value, std::vector *p_edge_in, std::vector *p_edge_out, - EdgeType (*func) (const NodeType &node_value, - const std::vector &edge_in, - size_t out_index) - ) { + EdgeType (*func) + (const NodeType &node_value, + const std::vector &edge_in, + size_t out_index)) { RefLinkVector &links = tree_links; if (links.size() == 0) return kSuccess; // number of links - const int nlink = static_cast(links.size()); + const int nlink = static_cast(links.size()); // initialize the pointers for (int i = 0; i < nlink; ++i) { links[i].ResetSize(); @@ -58,7 +60,7 @@ AllreduceRobust::MsgPassing(const NodeType &node_value, // if no childs, no need to, directly start passing message if (nlink == static_cast(parent_index != -1)) { utils::Assert(parent_index == 0, "parent must be 0"); - edge_out[parent_index] = func(node_value, edge_in, parent_index); + edge_out[parent_index] = func(node_value, edge_in, parent_index); stage = 1; } // while we have not passed the messages out @@ -94,7 +96,7 @@ AllreduceRobust::MsgPassing(const NodeType &node_value, selecter.Select(); // exception handling for (int i = 0; i < nlink; ++i) { - // recive OOB message from some link + // recive OOB message from some link if (selecter.CheckExcept(links[i].sock)) return kGetExcept; } if (stage == 0) { @@ -103,7 +105,9 @@ AllreduceRobust::MsgPassing(const NodeType &node_value, for (int i = 0; i < nlink; ++i) { if (i != parent_index) { if (selecter.CheckRead(links[i].sock)) { - if (!links[i].ReadToArray(&edge_in[i], sizeof(EdgeType))) return kSockError; + if (!links[i].ReadToArray(&edge_in[i], sizeof(EdgeType))) { + return kSockError; + } } if (links[i].size_read != sizeof(EdgeType)) finished = false; } @@ -124,13 +128,17 @@ AllreduceRobust::MsgPassing(const NodeType &node_value, if (stage == 1) { const int pid = this->parent_index; utils::Assert(pid != -1, "MsgPassing invalid stage"); - if (!links[pid].WriteFromArray(&edge_out[pid], sizeof(EdgeType))) return kSockError; + if (!links[pid].WriteFromArray(&edge_out[pid], sizeof(EdgeType))) { + return kSockError; + } if (links[pid].size_write == sizeof(EdgeType)) stage = 2; } if (stage == 2) { const int pid = this->parent_index; - utils::Assert(pid != -1, "MsgPassing invalid stage"); - if (!links[pid].ReadToArray(&edge_in[pid], sizeof(EdgeType))) return kSockError; + utils::Assert(pid != -1, "MsgPassing invalid stage"); + if (!links[pid].ReadToArray(&edge_in[pid], sizeof(EdgeType))) { + return kSockError; + } if (links[pid].size_read == sizeof(EdgeType)) { for (int i = 0; i < nlink; ++i) { if (i != pid) edge_out[i] = func(node_value, edge_in, i); @@ -141,7 +149,9 @@ AllreduceRobust::MsgPassing(const NodeType &node_value, if (stage == 3) { for (int i = 0; i < nlink; ++i) { if (i != parent_index && links[i].size_write != sizeof(EdgeType)) { - if (!links[i].WriteFromArray(&edge_out[i], sizeof(EdgeType))) return kSockError; + if (!links[i].WriteFromArray(&edge_out[i], sizeof(EdgeType))) { + return kSockError; + } } } } @@ -150,4 +160,4 @@ AllreduceRobust::MsgPassing(const NodeType &node_value, } } // namespace engine } // namespace rabit -#endif // RABIT_ENGINE_ROBUST_INL_H +#endif // RABIT_ENGINE_ROBUST_INL_H_ diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index fb53a0777..e25a9c85f 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2014 by Contributors * \file allreduce_robust.cc * \brief Robust implementation of Allreduce * @@ -9,10 +10,10 @@ #define NOMINMAX #include #include -#include -#include -#include -#include +#include "rabit/io.h" +#include "rabit/utils.h" +#include "rabit/engine.h" +#include "rabit/rabit-inl.h" #include "./allreduce_robust.h" namespace rabit { @@ -30,10 +31,10 @@ void AllreduceRobust::Shutdown(void) { utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckPoint, ActionSummary::kSpecialOp), "Shutdown: check point must return true"); // reset result buffer - resbuf.Clear(); seq_counter = 0; + resbuf.Clear(); seq_counter = 0; // execute check ack step, load happens here utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp), - "Shutdown: check ack must return true"); + "Shutdown: check ack must return true"); AllreduceBase::Shutdown(); } /*! @@ -89,7 +90,7 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_, } else { recovered = RecoverExec(sendrecvbuf_, type_nbytes * count, 0, seq_counter); } - } + } } resbuf.PushTemp(seq_counter, type_nbytes, count); seq_counter += 1; @@ -102,7 +103,7 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_, */ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) { // skip action in single node - if (world_size == 1) return; + if (world_size == 1) return; bool recovered = RecoverExec(sendrecvbuf_, total_size, 0, seq_counter); // now we are free to remove the last result, if any if (resbuf.LastSeqNo() != -1 && @@ -119,7 +120,7 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root) } else { recovered = RecoverExec(sendrecvbuf_, total_size, 0, seq_counter); } - } + } } resbuf.PushTemp(seq_counter, 1, total_size); seq_counter += 1; @@ -151,7 +152,8 @@ int AllreduceRobust::LoadCheckPoint(ISerializable *global_model, // skip action in single node if (world_size == 1) return 0; if (num_local_replica == 0) { - utils::Check(local_model == NULL, "need to set num_local_replica larger than 1 to checkpoint local_model"); + utils::Check(local_model == NULL, + "need to set num_local_replica larger than 1 to checkpoint local_model"); } // check if we succesful if (RecoverExec(NULL, 0, ActionSummary::kLoadCheck, ActionSummary::kSpecialOp)) { @@ -171,9 +173,10 @@ int AllreduceRobust::LoadCheckPoint(ISerializable *global_model, // load from buffer utils::MemoryBufferStream fs(&global_checkpoint); if (global_checkpoint.length() == 0) { - version_number = 0; + version_number = 0; } else { - utils::Assert(fs.Read(&version_number, sizeof(version_number)) != 0, "read in version number"); + utils::Assert(fs.Read(&version_number, sizeof(version_number)) != 0, + "read in version number"); global_model->Load(fs); utils::Assert(local_model == NULL || nlocal == num_local_replica + 1, "local model inconsistent, nlocal=%d", nlocal); @@ -212,9 +215,10 @@ void AllreduceRobust::CheckPoint(const ISerializable *global_model, version_number += 1; return; } if (num_local_replica == 0) { - utils::Check(local_model == NULL, "need to set num_local_replica larger than 1 to checkpoint local_model"); - } - if (num_local_replica != 0) { + utils::Check(local_model == NULL, + "need to set num_local_replica larger than 1 to checkpoint local_model"); + } + if (num_local_replica != 0) { while (true) { if (RecoverExec(NULL, 0, 0, ActionSummary::kLocalCheckPoint)) break; // save model model to new version place @@ -247,10 +251,10 @@ void AllreduceRobust::CheckPoint(const ISerializable *global_model, fs.Write(&version_number, sizeof(version_number)); global_model->Save(fs); // reset result buffer - resbuf.Clear(); seq_counter = 0; + resbuf.Clear(); seq_counter = 0; // execute check ack step, load happens here utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp), - "check ack must return true"); + "check ack must return true"); } /*! * \brief reset the all the existing links by sending Out-of-Band message marker @@ -383,7 +387,8 @@ AllreduceRobust::ReturnType AllreduceRobust::TryResetLinks(void) { */ bool AllreduceRobust::CheckAndRecover(ReturnType err_type) { if (err_type == kSuccess) return true; - {// simple way, shutdown all links + { + // simple way, shutdown all links for (size_t i = 0; i < all_links.size(); ++i) { if (!all_links[i].sock.BadSocket()) all_links[i].sock.Close(); } @@ -392,8 +397,8 @@ bool AllreduceRobust::CheckAndRecover(ReturnType err_type) { } // this was old way // TryResetLinks still causes possible errors, so not use this one - while(err_type != kSuccess) { - switch(err_type) { + while (err_type != kSuccess) { + switch (err_type) { case kGetExcept: err_type = TryResetLinks(); break; case kSockError: { TryResetLinks(); @@ -416,7 +421,7 @@ bool AllreduceRobust::CheckAndRecover(ReturnType err_type) { * \param out_index the edge index of output link * \return the shorest distance result of out edge specified by out_index */ -inline std::pair +inline std::pair ShortestDist(const std::pair &node_value, const std::vector< std::pair > &dist_in, size_t out_index) { @@ -484,8 +489,9 @@ AllreduceRobust::TryDecideRouting(AllreduceRobust::RecoverType role, int *p_recvlink, std::vector *p_req_in) { int best_link = -2; - {// get the shortest distance to the request point - std::vector< std::pair > dist_in, dist_out; + { + // get the shortest distance to the request point + std::vector > dist_in, dist_out; ReturnType succ = MsgPassing(std::make_pair(role == kHaveData, *p_size), &dist_in, &dist_out, ShortestDist); if (succ != kSuccess) return succ; @@ -512,7 +518,7 @@ AllreduceRobust::TryDecideRouting(AllreduceRobust::RecoverType role, &req_in, &req_out, DataRequest); if (succ != kSuccess) return succ; // set p_req_in - p_req_in->resize(req_in.size()); + p_req_in->resize(req_in.size()); for (size_t i = 0; i < req_in.size(); ++i) { // set p_req_in (*p_req_in)[i] = (req_in[i] != 0); @@ -591,19 +597,23 @@ AllreduceRobust::TryRecoverData(RecoverType role, if (role == kRequestData) { const int pid = recv_link; if (selecter.CheckRead(links[pid].sock)) { - if(!links[pid].ReadToArray(sendrecvbuf_, size)) return kSockError; + if (!links[pid].ReadToArray(sendrecvbuf_, size)) return kSockError; } for (int i = 0; i < nlink; ++i) { if (req_in[i] && links[i].size_write != links[pid].size_read && selecter.CheckWrite(links[i].sock)) { - if(!links[i].WriteFromArray(sendrecvbuf_, links[pid].size_read)) return kSockError; + if (!links[i].WriteFromArray(sendrecvbuf_, links[pid].size_read)) { + return kSockError; + } } } } if (role == kHaveData) { for (int i = 0; i < nlink; ++i) { if (req_in[i] && selecter.CheckWrite(links[i].sock)) { - if(!links[i].WriteFromArray(sendrecvbuf_, size)) return kSockError; + if (!links[i].WriteFromArray(sendrecvbuf_, size)) { + return kSockError; + } } } } @@ -616,13 +626,14 @@ AllreduceRobust::TryRecoverData(RecoverType role, if (req_in[i]) min_write = std::min(links[i].size_write, min_write); } utils::Assert(min_write <= links[pid].size_read, "boundary check"); - if (!links[pid].ReadToRingBuffer(min_write)) return kSockError; + if (!links[pid].ReadToRingBuffer(min_write)) return kSockError; } - for (int i = 0; i < nlink; ++i) { - if (req_in[i] && selecter.CheckWrite(links[i].sock) && links[pid].size_read != links[i].size_write) { + for (int i = 0; i < nlink; ++i) { + if (req_in[i] && selecter.CheckWrite(links[i].sock) && + links[pid].size_read != links[i].size_write) { size_t start = links[i].size_write % buffer_size; // send out data from ring buffer - size_t nwrite = std::min(buffer_size - start, links[pid].size_read - links[i].size_write); + size_t nwrite = std::min(buffer_size - start, links[pid].size_read - links[i].size_write); ssize_t len = links[i].sock.Send(links[pid].buffer_head + start, nwrite); if (len != -1) { links[i].size_write += len; @@ -648,15 +659,15 @@ AllreduceRobust::TryRecoverData(RecoverType role, */ AllreduceRobust::ReturnType AllreduceRobust::TryLoadCheckPoint(bool requester) { // check in local data - RecoverType role = requester ? kRequestData : kHaveData; - ReturnType succ; + RecoverType role = requester ? kRequestData : kHaveData; + ReturnType succ; if (num_local_replica != 0) { if (requester) { // clear existing history, if any, before load local_rptr[local_chkpt_version].clear(); - local_chkpt[local_chkpt_version].clear(); + local_chkpt[local_chkpt_version].clear(); } - // recover local checkpoint + // recover local checkpoint succ = TryRecoverLocalState(&local_rptr[local_chkpt_version], &local_chkpt[local_chkpt_version]); if (succ != kSuccess) return succ; @@ -716,7 +727,7 @@ AllreduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool re // if we goes to this place, use must have already setup the state once utils::Assert(nlocal == 1 || nlocal == num_local_replica + 1, "TryGetResult::Checkpoint"); - return TryRecoverLocalState(&local_rptr[new_version], &local_chkpt[new_version]); + return TryRecoverLocalState(&local_rptr[new_version], &local_chkpt[new_version]); } // handles normal data recovery RecoverType role; @@ -735,8 +746,9 @@ AllreduceRobust::TryGetResult(void *sendrecvbuf, size_t size, int seqno, bool re utils::Check(data_size != 0, "zero size check point is not allowed"); if (role == kRequestData || role == kHaveData) { utils::Check(data_size == size, - "Allreduce Recovered data size do not match the specification of function call\n"\ - "Please check if calling sequence of recovered program is the same the original one in current VersionNumber"); + "Allreduce Recovered data size do not match the specification of function call.\n"\ + "Please check if calling sequence of recovered program is the " \ + "same the original one in current VersionNumber"); } return TryRecoverData(role, sendrecvbuf, data_size, recv_link, req_in); } @@ -766,7 +778,7 @@ bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { while (true) { this->ReportStatus(); // action - ActionSummary act = req; + ActionSummary act = req; // get the reduced action if (!CheckAndRecover(TryAllreduce(&act, sizeof(act), 1, ActionSummary::Reducer))) continue; if (act.check_ack()) { @@ -816,7 +828,8 @@ bool AllreduceRobust::RecoverExec(void *buf, size_t size, int flag, int seqno) { if (!CheckAndRecover(TryGetResult(buf, size, act.min_seqno(), requester))) continue; if (requester) return true; } else { - // all the request is same, this is most recent command that is yet to be executed + // all the request is same, + // this is most recent command that is yet to be executed return false; } } @@ -855,7 +868,8 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, utils::Assert(chkpt.length() == 0, "local chkpt space inconsistent"); } const int n = num_local_replica; - {// backward passing, passing state in backward direction of the ring + { + // backward passing, passing state in backward direction of the ring const int nlocal = static_cast(rptr.size() - 1); utils::Assert(nlocal <= n + 1, "invalid local replica"); std::vector msg_back(n + 1); @@ -897,10 +911,10 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, // update rptr rptr.resize(nread_end + 1); for (int i = nlocal; i < nread_end; ++i) { - rptr[i + 1] = rptr[i] + sizes[i]; + rptr[i + 1] = rptr[i] + sizes[i]; } chkpt.resize(rptr.back()); - // pass data through the link + // pass data through the link succ = RingPassing(BeginPtr(chkpt), rptr[nlocal], rptr[nread_end], rptr[nwrite_start], rptr[nread_end], ring_next, ring_prev); @@ -908,7 +922,8 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, rptr.resize(nlocal + 1); chkpt.resize(rptr.back()); return succ; } } - {// forward passing, passing state in forward direction of the ring + { + // forward passing, passing state in forward direction of the ring const int nlocal = static_cast(rptr.size() - 1); utils::Assert(nlocal <= n + 1, "invalid local replica"); std::vector msg_forward(n + 1); @@ -926,7 +941,7 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, 1 * sizeof(int), 2 * sizeof(int), 0 * sizeof(int), 1 * sizeof(int), ring_next, ring_prev); - if (succ != kSuccess) return succ; + if (succ != kSuccess) return succ; // calculate the number of things we can read from next link int nread_end = nlocal, nwrite_end = 1; // have to have itself in order to get other data from prev link @@ -936,7 +951,7 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, nread_end = std::max(nread_end, i + 1); nwrite_end = i + 1; } - if (nwrite_end > n) nwrite_end = n; + if (nwrite_end > n) nwrite_end = n; } else { nread_end = 0; nwrite_end = 0; } @@ -963,7 +978,7 @@ AllreduceRobust::TryRecoverLocalState(std::vector *p_local_rptr, rptr[i + 1] = rptr[i] + sizes[i]; } chkpt.resize(rptr.back()); - // pass data through the link + // pass data through the link succ = RingPassing(BeginPtr(chkpt), rptr[nlocal], rptr[nread_end], rptr[nwrite_start], rptr[nwrite_end], ring_prev, ring_next); @@ -995,7 +1010,8 @@ AllreduceRobust::TryCheckinLocalState(std::vector *p_local_rptr, if (num_local_replica == 0) return kSuccess; std::vector &rptr = *p_local_rptr; std::string &chkpt = *p_local_chkpt; - utils::Assert(rptr.size() == 2, "TryCheckinLocalState must have exactly 1 state"); + utils::Assert(rptr.size() == 2, + "TryCheckinLocalState must have exactly 1 state"); const int n = num_local_replica; std::vector sizes(n + 1); sizes[0] = rptr[1] - rptr[0]; @@ -1012,9 +1028,9 @@ AllreduceRobust::TryCheckinLocalState(std::vector *p_local_rptr, rptr.resize(n + 2); for (int i = 1; i <= n; ++i) { rptr[i + 1] = rptr[i] + sizes[i]; - } + } chkpt.resize(rptr.back()); - // pass data through the link + // pass data through the link succ = RingPassing(BeginPtr(chkpt), rptr[1], rptr[n + 1], rptr[0], rptr[n], @@ -1050,13 +1066,14 @@ AllreduceRobust::RingPassing(void *sendrecvbuf_, LinkRecord *read_link, LinkRecord *write_link) { if (read_link == NULL || write_link == NULL || read_end == 0) return kSuccess; - utils::Assert(write_end <= read_end, "RingPassing: boundary check1, write_end=%lu, read_end=%lu", write_end, read_end); + utils::Assert(write_end <= read_end, + "RingPassing: boundary check1"); utils::Assert(read_ptr <= read_end, "RingPassing: boundary check2"); utils::Assert(write_ptr <= write_end, "RingPassing: boundary check3"); // take reference LinkRecord &prev = *read_link, &next = *write_link; // send recv buffer - char *buf = reinterpret_cast(sendrecvbuf_); + char *buf = reinterpret_cast(sendrecvbuf_); while (true) { bool finished = true; utils::SelectHelper selecter; @@ -1066,7 +1083,7 @@ AllreduceRobust::RingPassing(void *sendrecvbuf_, } if (write_ptr < read_ptr && write_ptr != write_end) { selecter.WatchWrite(next.sock); - finished = false; + finished = false; } selecter.WatchException(prev.sock); selecter.WatchException(next.sock); @@ -1078,7 +1095,7 @@ AllreduceRobust::RingPassing(void *sendrecvbuf_, ssize_t len = prev.sock.Recv(buf + read_ptr, read_end - read_ptr); if (len == 0) { prev.sock.Close(); return kSockError; - } + } if (len != -1) { read_ptr += static_cast(len); } else { diff --git a/src/allreduce_robust.h b/src/allreduce_robust.h index d178e391a..f2a804e95 100644 --- a/src/allreduce_robust.h +++ b/src/allreduce_robust.h @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2014 by Contributors * \file allreduce_robust.h * \brief Robust implementation of Allreduce * using TCP non-block socket and tree-shape reduction. @@ -7,10 +8,12 @@ * * \author Tianqi Chen, Ignacio Cano, Tianyi Zhou */ -#ifndef RABIT_ALLREDUCE_ROBUST_H -#define RABIT_ALLREDUCE_ROBUST_H +#ifndef RABIT_ALLREDUCE_ROBUST_H_ +#define RABIT_ALLREDUCE_ROBUST_H_ #include -#include +#include +#include +#include "rabit/engine.h" #include "./allreduce_base.h" namespace rabit { @@ -111,11 +114,11 @@ class AllreduceRobust : public AllreduceBase { private: // constant one byte out of band message to indicate error happening // and mark for channel cleanup - const static char kOOBReset = 95; + static const char kOOBReset = 95; // and mark for channel cleanup, after OOB signal - const static char kResetMark = 97; + static const char kResetMark = 97; // and mark for channel cleanup - const static char kResetAck = 97; + static const char kResetAck = 97; /*! \brief type of roles each node can play during recovery */ enum RecoverType { /*! \brief current node have data */ @@ -132,29 +135,29 @@ class AllreduceRobust : public AllreduceBase { */ struct ActionSummary { // maximumly allowed sequence id - const static int kSpecialOp = (1 << 26); + static const int kSpecialOp = (1 << 26); // special sequence number for local state checkpoint - const static int kLocalCheckPoint = (1 << 26) - 2; + static const int kLocalCheckPoint = (1 << 26) - 2; // special sequnce number for local state checkpoint ack signal - const static int kLocalCheckAck = (1 << 26) - 1; + static const int kLocalCheckAck = (1 << 26) - 1; //--------------------------------------------- - // The following are bit mask of flag used in + // The following are bit mask of flag used in //---------------------------------------------- // some node want to load check point - const static int kLoadCheck = 1; + static const int kLoadCheck = 1; // some node want to do check point - const static int kCheckPoint = 2; + static const int kCheckPoint = 2; // check point Ack, we use a two phase message in check point, // this is the second phase of check pointing - const static int kCheckAck = 4; + static const int kCheckAck = 4; // there are difference sequence number the nodes proposed // this means we want to do recover execution of the lower sequence // action instead of normal execution - const static int kDiffSeq = 8; + static const int kDiffSeq = 8; // constructor ActionSummary(void) {} - // constructor of action - ActionSummary(int flag, int minseqno = kSpecialOp) { + // constructor of action + explicit ActionSummary(int flag, int minseqno = kSpecialOp) { seqcode = (minseqno << 4) | flag; } // minimum number of all operations @@ -181,10 +184,11 @@ class AllreduceRobust : public AllreduceBase { inline int flag(void) const { return seqcode & 15; } - // reducer for Allreduce, used to get the result ActionSummary from all nodes - inline static void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) { + // reducer for Allreduce, get the result ActionSummary from all nodes + inline static void Reducer(const void *src_, void *dst_, + int len, const MPI::Datatype &dtype) { const ActionSummary *src = (const ActionSummary*)src_; - ActionSummary *dst = (ActionSummary*)dst_; + ActionSummary *dst = reinterpret_cast(dst_); for (int i = 0; i < len; ++i) { int src_seqno = src[i].min_seqno(); int dst_seqno = dst[i].min_seqno(); @@ -192,7 +196,8 @@ class AllreduceRobust : public AllreduceBase { if (src_seqno == dst_seqno) { dst[i] = ActionSummary(flag, src_seqno); } else { - dst[i] = ActionSummary(flag | kDiffSeq, std::min(src_seqno, dst_seqno)); + dst[i] = ActionSummary(flag | kDiffSeq, + std::min(src_seqno, dst_seqno)); } } } @@ -222,7 +227,7 @@ class AllreduceRobust : public AllreduceBase { data_.resize(rptr_.back() + nhop); return BeginPtr(data_) + rptr_.back(); } - // push the result in temp to the + // push the result in temp to the inline void PushTemp(int seqid, size_t type_nbytes, size_t count) { size_t size = type_nbytes * count; size_t nhop = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); @@ -234,13 +239,14 @@ class AllreduceRobust : public AllreduceBase { size_.push_back(size); utils::Assert(data_.size() == rptr_.back(), "PushTemp inconsistent"); } - // return the stored result of seqid, if any + // return the stored result of seqid, if any inline void* Query(int seqid, size_t *p_size) { - size_t idx = std::lower_bound(seqno_.begin(), seqno_.end(), seqid) - seqno_.begin(); + size_t idx = std::lower_bound(seqno_.begin(), + seqno_.end(), seqid) - seqno_.begin(); if (idx == seqno_.size() || seqno_[idx] != seqid) return NULL; *p_size = size_[idx]; return BeginPtr(data_) + rptr_[idx]; - } + } // drop last stored result inline void DropLast(void) { utils::Assert(seqno_.size() != 0, "there is nothing to be dropped"); @@ -254,15 +260,16 @@ class AllreduceRobust : public AllreduceBase { if (seqno_.size() == 0) return -1; return seqno_.back(); } + private: - // sequence number of each + // sequence number of each std::vector seqno_; // pointer to the positions std::vector rptr_; // actual size of each buffer std::vector size_; // content of the buffer - std::vector data_; + std::vector data_; }; /*! * \brief reset the all the existing links by sending Out-of-Band message marker @@ -291,14 +298,16 @@ class AllreduceRobust : public AllreduceBase { * \param buf the buffer to store the result * \param size the total size of the buffer * \param flag flag information about the action \sa ActionSummary - * \param seqno sequence number of the action, if it is special action with flag set, seqno needs to be set to ActionSummary::kSpecialOp + * \param seqno sequence number of the action, if it is special action with flag set, + * seqno needs to be set to ActionSummary::kSpecialOp * * \return if this function can return true or false - * - true means buf already set to the - * result by recovering procedure, the action is complete, no further action is needed + * - true means buf already set to the + * result by recovering procedure, the action is complete, no further action is needed * - false means this is the lastest action that has not yet been executed, need to execute the action */ - bool RecoverExec(void *buf, size_t size, int flag, int seqno = ActionSummary::kSpecialOp); + bool RecoverExec(void *buf, size_t size, int flag, + int seqno = ActionSummary::kSpecialOp); /*! * \brief try to load check point * @@ -363,7 +372,7 @@ class AllreduceRobust : public AllreduceBase { void *sendrecvbuf_, size_t size, int recv_link, - const std::vector &req_in); + const std::vector &req_in); /*! * \brief try to recover the local state, making each local state to be the result of itself * plus replication of states in previous num_local_replica hops in the ring @@ -446,17 +455,17 @@ o * the input state must exactly one saved state(local state of current node) inline ReturnType MsgPassing(const NodeType &node_value, std::vector *p_edge_in, std::vector *p_edge_out, - EdgeType (*func) (const NodeType &node_value, - const std::vector &edge_in, - size_t out_index) - ); + EdgeType (*func) + (const NodeType &node_value, + const std::vector &edge_in, + size_t out_index)); //---- recovery data structure ---- // the round of result buffer, used to mode the result int result_buffer_round; // result buffer of all reduce ResultBuffer resbuf; // last check point global model - std::string global_checkpoint; + std::string global_checkpoint; // number of replica for local state/model int num_local_replica; // --- recovery data structure for local checkpoint @@ -465,16 +474,15 @@ o * the input state must exactly one saved state(local state of current node) // pointer to memory position in the local model // local model is stored in CSR format(like a sparse matrices) // local_model[rptr[0]:rptr[1]] stores the model of current node - // local_model[rptr[k]:rptr[k+1]] stores the model of node in previous k hops in the ring + // local_model[rptr[k]:rptr[k+1]] stores the model of node in previous k hops std::vector local_rptr[2]; // storage for local model replicas std::string local_chkpt[2]; // version of local checkpoint can be 1 or 0 - int local_chkpt_version; + int local_chkpt_version; }; } // namespace engine } // namespace rabit // implementation of inline template function #include "./allreduce_robust-inl.h" - -#endif // RABIT_ALLREDUCE_ROBUST_H +#endif // RABIT_ALLREDUCE_ROBUST_H_ diff --git a/src/engine.cc b/src/engine.cc index 57e074109..45bef329c 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2014 by Contributors * \file engine.cc * \brief this file governs which implementation of engine we are actually using * provides an singleton of engine interface @@ -41,16 +42,17 @@ void Finalize(void) { IEngine *GetEngine(void) { return &manager; } -// perform in-place allreduce, on sendrecvbuf +// perform in-place allreduce, on sendrecvbuf void Allreduce_(void *sendrecvbuf, size_t type_nbytes, size_t count, - IEngine::ReduceFunction red, + IEngine::ReduceFunction red, mpi::DataType dtype, mpi::OpType op, IEngine::PreprocFunction prepare_fun, void *prepare_arg) { - GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun, prepare_arg); + GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, + red, prepare_fun, prepare_arg); } // code for reduce handle diff --git a/src/engine_empty.cc b/src/engine_empty.cc index ff838717e..0c7020914 100644 --- a/src/engine_empty.cc +++ b/src/engine_empty.cc @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2014 by Contributors * \file engine_empty.cc * \brief this file provides a dummy implementation of engine that does nothing * this file provides a way to fall back to single node program without causing too many dependencies @@ -25,9 +26,10 @@ class EmptyEngine : public IEngine { ReduceFunction reducer, PreprocFunction prepare_fun, void *prepare_arg) { - utils::Error("EmptyEngine:: Allreduce is not supported, use Allreduce_ instead"); + utils::Error("EmptyEngine:: Allreduce is not supported,"\ + "use Allreduce_ instead"); } - virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) { + virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) { } virtual void InitAfterException(void) { utils::Error("EmptyEngine is not fault tolerant"); @@ -51,7 +53,7 @@ class EmptyEngine : public IEngine { virtual int GetWorldSize(void) const { return 1; } - /*! \brief get the host name of current node */ + /*! \brief get the host name of current node */ virtual std::string GetHost(void) const { return std::string(""); } @@ -59,6 +61,7 @@ class EmptyEngine : public IEngine { // simply print information into the tracker utils::Printf("%s", msg.c_str()); } + private: int version_number; }; @@ -77,11 +80,11 @@ void Finalize(void) { IEngine *GetEngine(void) { return &manager; } -// perform in-place allreduce, on sendrecvbuf +// perform in-place allreduce, on sendrecvbuf void Allreduce_(void *sendrecvbuf, size_t type_nbytes, size_t count, - IEngine::ReduceFunction red, + IEngine::ReduceFunction red, mpi::DataType dtype, mpi::OpType op, IEngine::PreprocFunction prepare_fun, diff --git a/src/engine_mock.cc b/src/engine_mock.cc index e8a77a6a2..24415a1d5 100644 --- a/src/engine_mock.cc +++ b/src/engine_mock.cc @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2014 by Contributors * \file engine_mock.cc * \brief this is an engine implementation that will * insert failures in certain call point, to test if the engine is robust to failure diff --git a/src/engine_mpi.cc b/src/engine_mpi.cc index d8a30cbbc..bdad5e2d1 100644 --- a/src/engine_mpi.cc +++ b/src/engine_mpi.cc @@ -1,4 +1,5 @@ /*! + * Copyright (c) 2014 by Contributors * \file engine_mpi.cc * \brief this file gives an implementation of engine interface using MPI, * this will allow rabit program to run with MPI, but do not comes with fault tolerant @@ -8,10 +9,10 @@ #define _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_DEPRECATE #define NOMINMAX -#include -#include -#include #include +#include +#include "rabit/engine.h" +#include "rabit/utils.h" namespace rabit { namespace engine { @@ -27,9 +28,10 @@ class MPIEngine : public IEngine { ReduceFunction reducer, PreprocFunction prepare_fun, void *prepare_arg) { - utils::Error("MPIEngine:: Allreduce is not supported, use Allreduce_ instead"); + utils::Error("MPIEngine:: Allreduce is not supported,"\ + "use Allreduce_ instead"); } - virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) { + virtual void Broadcast(void *sendrecvbuf_, size_t size, int root) { MPI::COMM_WORLD.Bcast(sendrecvbuf_, size, MPI::CHAR, root); } virtual void InitAfterException(void) { @@ -48,13 +50,13 @@ class MPIEngine : public IEngine { } /*! \brief get rank of current node */ virtual int GetRank(void) const { - return MPI::COMM_WORLD.Get_rank(); + return MPI::COMM_WORLD.Get_rank(); } /*! \brief get total number of */ virtual int GetWorldSize(void) const { - return MPI::COMM_WORLD.Get_size(); + return MPI::COMM_WORLD.Get_size(); } - /*! \brief get the host name of current node */ + /*! \brief get the host name of current node */ virtual std::string GetHost(void) const { int len; char name[MPI_MAX_PROCESSOR_NAME]; @@ -68,6 +70,7 @@ class MPIEngine : public IEngine { utils::Printf("%s", msg.c_str()); } } + private: int version_number; }; @@ -91,7 +94,7 @@ IEngine *GetEngine(void) { // transform enum to MPI data type inline MPI::Datatype GetType(mpi::DataType dtype) { using namespace mpi; - switch(dtype) { + switch (dtype) { case kInt: return MPI::INT; case kUInt: return MPI::UNSIGNED; case kFloat: return MPI::FLOAT; @@ -103,7 +106,7 @@ inline MPI::Datatype GetType(mpi::DataType dtype) { // transform enum to MPI OP inline MPI::Op GetOp(mpi::OpType otype) { using namespace mpi; - switch(otype) { + switch (otype) { case kMax: return MPI::MAX; case kMin: return MPI::MIN; case kSum: return MPI::SUM; @@ -112,17 +115,18 @@ inline MPI::Op GetOp(mpi::OpType otype) { utils::Error("unknown mpi::OpType"); return MPI::MAX; } -// perform in-place allreduce, on sendrecvbuf +// perform in-place allreduce, on sendrecvbuf void Allreduce_(void *sendrecvbuf, size_t type_nbytes, size_t count, - IEngine::ReduceFunction red, + IEngine::ReduceFunction red, mpi::DataType dtype, mpi::OpType op, IEngine::PreprocFunction prepare_fun, void *prepare_arg) { if (prepare_fun != NULL) prepare_fun(prepare_arg); - MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, GetType(dtype), GetOp(op)); + MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, + count, GetType(dtype), GetOp(op)); } // code for reduce handle @@ -152,7 +156,7 @@ void ReduceHandle::Init(IEngine::ReduceFunction redfunc, size_t type_nbytes) { created_type_nbytes_ = type_nbytes; htype_ = dtype; } - + MPI::Op *op = new MPI::Op(); MPI::User_function *pf = redfunc; op->Init(pf, true); @@ -175,7 +179,7 @@ void ReduceHandle::Allreduce(void *sendrecvbuf, dtype->Commit(); created_type_nbytes_ = type_nbytes; } - if (prepare_fun != NULL) prepare_fun(prepare_arg); + if (prepare_fun != NULL) prepare_fun(prepare_arg); MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, *dtype, *op); } } // namespace engine diff --git a/src/socket.h b/src/socket.h index 29d62db35..c40cb6a88 100644 --- a/src/socket.h +++ b/src/socket.h @@ -1,10 +1,11 @@ -#ifndef RABIT_SOCKET_H -#define RABIT_SOCKET_H /*! + * Copyright (c) 2014 by Contributors * \file socket.h * \brief this file aims to provide a wrapper of sockets * \author Tianqi Chen */ +#ifndef RABIT_SOCKET_H_ +#define RABIT_SOCKET_H_ #if defined(_WIN32) #include #include @@ -21,7 +22,7 @@ #endif #include #include -#include +#include "rabit/utils.h" #if defined(_WIN32) typedef int ssize_t; @@ -68,9 +69,11 @@ struct SockAddr { inline std::string AddrStr(void) const { std::string buf; buf.resize(256); #ifdef _WIN32 - const char *s = inet_ntop(AF_INET, (PVOID)&addr.sin_addr, &buf[0], buf.length()); + const char *s = inet_ntop(AF_INET, (PVOID)&addr.sin_addr, + &buf[0], buf.length()); #else - const char *s = inet_ntop(AF_INET, &addr.sin_addr, &buf[0], buf.length()); + const char *s = inet_ntop(AF_INET, &addr.sin_addr, + &buf[0], buf.length()); #endif Assert(s != NULL, "cannot decode address"); return std::string(s); @@ -94,12 +97,12 @@ class Socket { */ inline static void Startup(void) { #ifdef _WIN32 - WSADATA wsa_data; + WSADATA wsa_data; if (WSAStartup(MAKEWORD(2, 2), &wsa_data) != -1) { - Socket::Error("Startup"); - } + Socket::Error("Startup"); + } if (LOBYTE(wsa_data.wVersion) != 2 || HIBYTE(wsa_data.wVersion) != 2) { - WSACleanup(); + WSACleanup(); utils::Error("Could not find a usable version of Winsock.dll\n"); } #endif @@ -118,11 +121,11 @@ class Socket { * it will set it back to block mode */ inline void SetNonBlock(bool non_block) { -#ifdef _WIN32 - u_long mode = non_block ? 1 : 0; - if (ioctlsocket(sockfd, FIONBIO, &mode) != NO_ERROR) { +#ifdef _WIN32 + u_long mode = non_block ? 1 : 0; + if (ioctlsocket(sockfd, FIONBIO, &mode) != NO_ERROR) { Socket::Error("SetNonBlock"); - } + } #else int flag = fcntl(sockfd, F_GETFL, 0); if (flag == -1) { @@ -143,7 +146,8 @@ class Socket { * \param addr */ inline void Bind(const SockAddr &addr) { - if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == -1) { + if (bind(sockfd, reinterpret_cast(&addr.addr), + sizeof(addr.addr)) == -1) { Socket::Error("Bind"); } } @@ -154,10 +158,11 @@ class Socket { * \return the port successfully bind to, return -1 if failed to bind any port */ inline int TryBindHost(int start_port, int end_port) { - // TODO, add prefix check + // TODO(tqchen) add prefix check for (int port = start_port; port < end_port; ++port) { SockAddr addr("0.0.0.0", port); - if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == 0) { + if (bind(sockfd, reinterpret_cast(&addr.addr), + sizeof(addr.addr)) == 0) { return port; } if (errno != EADDRINUSE) { @@ -179,22 +184,22 @@ class Socket { inline bool BadSocket(void) const { if (IsClosed()) return true; int err = GetSockError(); - if (err == EBADF || err == EINTR) return true; + if (err == EBADF || err == EINTR) return true; return false; } /*! \brief check if socket is already closed */ inline bool IsClosed(void) const { return sockfd == INVALID_SOCKET; - } + } /*! \brief close the socket */ inline void Close(void) { if (sockfd != INVALID_SOCKET) { #ifdef _WIN32 closesocket(sockfd); #else - close(sockfd); + close(sockfd); #endif - sockfd = INVALID_SOCKET; + sockfd = INVALID_SOCKET; } else { Error("Socket::Close double close the socket or close without create"); } @@ -204,6 +209,7 @@ class Socket { int errsv = errno; utils::Error("Socket %s Error:%s", msg, strerror(errsv)); } + protected: explicit Socket(SOCKET sockfd) : sockfd(sockfd) { } @@ -227,7 +233,7 @@ class TCPSocket : public Socket{ int opt = static_cast(keepalive); if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &opt, sizeof(opt)) < 0) { Socket::Error("SetKeepAlive"); - } + } } /*! * \brief create the socket, call this before using socket @@ -273,7 +279,8 @@ class TCPSocket : public Socket{ * \return whether connect is successful */ inline bool Connect(const SockAddr &addr) { - return connect(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == 0; + return connect(sockfd, reinterpret_cast(&addr.addr), + sizeof(addr.addr)) == 0; } /*! * \brief send data using the socket @@ -284,7 +291,7 @@ class TCPSocket : public Socket{ * return -1 if error occurs */ inline ssize_t Send(const void *buf_, size_t len, int flag = 0) { - const char *buf = reinterpret_cast(buf_); + const char *buf = reinterpret_cast(buf_); return send(sockfd, buf, static_cast(len), flag); } /*! @@ -296,7 +303,7 @@ class TCPSocket : public Socket{ * return -1 if error occurs */ inline ssize_t Recv(void *buf_, size_t len, int flags = 0) { - char *buf = reinterpret_cast(buf_); + char *buf = reinterpret_cast(buf_); return recv(sockfd, buf, static_cast(len), flags); } /*! @@ -331,7 +338,8 @@ class TCPSocket : public Socket{ char *buf = reinterpret_cast(buf_); size_t ndone = 0; while (ndone < len) { - ssize_t ret = recv(sockfd, buf, static_cast(len - ndone), MSG_WAITALL); + ssize_t ret = recv(sockfd, buf, + static_cast(len - ndone), MSG_WAITALL); if (ret == -1) { if (errno == EAGAIN || errno == EWOULDBLOCK) return ndone; Socket::Error("RecvAll"); @@ -385,7 +393,7 @@ struct SelectHelper { * \param fd file descriptor to be watched */ inline void WatchRead(SOCKET fd) { - FD_SET(fd, &read_set); + FD_SET(fd, &read_set); if (fd > maxfd) maxfd = fd; } /*! @@ -403,7 +411,7 @@ struct SelectHelper { inline void WatchException(SOCKET fd) { FD_SET(fd, &except_set); if (fd > maxfd) maxfd = fd; - } + } /*! * \brief Check if the descriptor is ready for read * \param fd file descriptor to check status @@ -435,8 +443,9 @@ struct SelectHelper { fd_set wait_set; FD_ZERO(&wait_set); FD_SET(fd, &wait_set); - return Select_(static_cast(fd + 1), NULL, NULL, &wait_set, timeout); - } + return Select_(static_cast(fd + 1), + NULL, NULL, &wait_set, timeout); + } /*! * \brief peform select on the set defined * \param select_read whether to watch for read event @@ -454,9 +463,10 @@ struct SelectHelper { } return ret; } - + private: - inline static int Select_(int maxfd, fd_set *rfds, fd_set *wfds, fd_set *efds, long timeout) { + inline static int Select_(int maxfd, fd_set *rfds, + fd_set *wfds, fd_set *efds, long timeout) { utils::Assert(maxfd < FD_SETSIZE, "maxdf must be smaller than FDSETSIZE"); if (timeout == 0) { return select(maxfd, rfds, wfds, efds, NULL); @@ -465,12 +475,12 @@ struct SelectHelper { tm.tv_usec = (timeout % 1000) * 1000; tm.tv_sec = timeout / 1000; return select(maxfd, rfds, wfds, efds, &tm); - } + } } - - SOCKET maxfd; + + SOCKET maxfd; fd_set read_set, write_set, except_set; }; } // namespace utils } // namespace rabit -#endif +#endif // RABIT_SOCKET_H_ From d64d0ef1dced5055e16450e0824f004647c0d2f0 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 29 Dec 2014 06:11:58 -0800 Subject: [PATCH 150/531] cleanup submission script --- test/test.mk | 10 +++--- tracker/rabit_hadoop.py | 77 ++++++++++++++++++++++++++-------------- tracker/rabit_mpi.py | 43 ++++++++++++---------- tracker/rabit_tracker.py | 39 +++++++++++--------- 4 files changed, 101 insertions(+), 68 deletions(-) diff --git a/test/test.mk b/test/test.mk index 5f943103e..b1dddb0b4 100644 --- a/test/test.mk +++ b/test/test.mk @@ -10,17 +10,17 @@ endif local_recover: - ../tracker/rabit_mpi.py $(nslave) local test_local_recover $(ndata) rabit_local_replica=1 + ../tracker/rabit_mpi.py -n $(nslave) test_local_recover $(ndata) rabit_local_replica=1 local_recover_10_10k: - ../tracker/rabit_mpi.py 10 local test_local_recover 10000 rabit_local_replica=1 + ../tracker/rabit_mpi.py -n 10 test_local_recover 10000 rabit_local_replica=1 # this experiment test recovery with actually process exit, use keepalive to keep program alive model_recover_10_10k: - ../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 + ../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 model_recover_10_10k_die_same: - ../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 + ../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 model_recover_10_10k_die_hard: - ../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0 + ../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0 diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index b86d91281..7bc855ec8 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -10,40 +10,63 @@ import time import subprocess import rabit_tracker as tracker -#!!! you can directly set hadoop binary path and hadoop streaming path here -hadoop_binary = 'hadoop' +#!!! Set path to hadoop and hadoop streaming jar here +hadoop_binary = None hadoop_streaming_jar = None -parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using hadoop streaming') -parser.add_argument('-s', '--nslaves', required=True, type=int, - help = "number of slaves proccess to be launched") -if hadoop_binary == None: - parser.add_argument('-hb', '--hadoop_binary', required=True, - help="path-to-hadoop binary folder") -if hadoop_streaming_jar == None: - parser.add_argument('-hs', '--hadoop_streaming_jar', required=True, - help='path-to hadoop streamimg jar file') -parser.add_argument('-i', '--input', required=True) -parser.add_argument('-o', '--output', required=True) -parser.add_argument('-m', '--mapper', required=True) -parser.add_argument('-a', '--args', required=True) -parser.add_argument('-f', '--file', required=True) -args = parser.parse_args() +# code +hadoop_home = os.getenv('HADOOP_HOME') +if hadoop_home != None: + if hadoop_binary == None: + hadoop_binary = hadoop_home + '/bin/hadoop' + if hadoop_streaming_jar == None: + hadoop_streaming_jar = hadoop_home + '/bin/hadoop' -if hadoop_binary != None: - args.hadoop_binary = hadoop_binary -if hadoop_streaming_jar != None: - args.hadoop_streaming_jar = hadoop_streaming_jar +if hadoop_binary == None or hadoop_streaming_jar == None: + print 'Warning: Cannot auto-detect path to hadoop and streaming jar, need to set them via arguments -hs and -hb' + print '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14' + +parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using Hadoop Streaming') +parser.add_argument('-n', '--nslaves', required=True, type=int, + help = 'number of slaves proccess to be launched') +parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, + help = 'print more messages into the console') +parser.add_argument('-i', '--input', required=True, + help = 'input path in HDFS') +parser.add_argument('-o', '--output', required=True, + help = 'output path in HDFS') +parser.add_argument('-f', '--files', nargs = '*', + help = 'the cached file list in mapreduce') +parser.add_argument('command', nargs='+', + help = 'command for rabit program') +if hadoop_binary == None: + parser.add_argument('-hb', '--hadoop_binary', required = True, + help="path-to-hadoop binary folder") +else: + parser.add_argument('-hb', '--hadoop_binary', default = hadoop_binary, + help="path-to-hadoop binary folder") + +if hadoop_streaming_jar == None: + parser.add_argument('-jar', '--hadoop_streaming_jar', required = True, + help='path-to hadoop streamimg jar file') +else: + parser.add_argument('-jar', '--hadoop_streaming_jar', default = hadoop_streaming_jar, + help='path-to hadoop streamimg jar file') +args = parser.parse_args() def hadoop_streaming(nslaves, slave_args): cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nslaves) cmd += ' -input %s -output %s' % (args.input, args.output) - cmd += ' -mapper \"%s %s %s\" -reducer \"/bin/cat\" ' % (args.mapper, args.args, ' '.join(slave_args)) - for f in args.file.split('#'): - cmd += ' -file %s' % (f) + cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) + fset = set() + if os.path.exists(args.command[0]): + fset.add(args.command[0]) + for flst in args.files: + for f in flst.split('#'): + fset.add(f) + for f in fset: + cmd += ' -file %s' % f print cmd subprocess.check_call(cmd, shell = True) -start = time.time() -tracker.submit(args.nslaves, [], fun_submit= hadoop_streaming) -print 'All run took %s' % (time.time() - start) +tracker.submit(args.nslaves, [], fun_submit = hadoop_streaming, verbose = args.verbose) diff --git a/tracker/rabit_mpi.py b/tracker/rabit_mpi.py index 3b2b68c54..d8aa968f5 100755 --- a/tracker/rabit_mpi.py +++ b/tracker/rabit_mpi.py @@ -1,21 +1,30 @@ #!/usr/bin/python """ -This is an example script to create a customized job submit with mpi -script using rabit engine +This is the demo submission script of rabit, it is created to +submit rabit jobs using hadoop streaming """ +import argparse import sys import os import subprocess -# import the tcp_master.py -# add path to sync -sys.path.append(os.path.dirname(__file__)+'/src/') import rabit_tracker as tracker +parser = argparse.ArgumentParser(description='Rabit script to submit rabit job using MPI') +parser.add_argument('-n', '--nslaves', required=True, type=int, + help = 'number of slaves proccess to be launched') +parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, + help = 'print more messages into the console') +parser.add_argument('-H', '--hostfile', type=str, + help = 'the hostfile of mpi server') +parser.add_argument('command', nargs='+', + help = 'command for rabit program') +args = parser.parse_args() # -# Note: this submit script is only used for example purpose -# It does not have to be mpirun, it can be any job submission script that starts the job, qsub, hadoop streaming etc. -# -def mpi_submit(nslave, args): +# Note: this submit script is only used for demo purpose +# It does not have to be mpirun, it can be any job submission +# script that starts the job, qsub, hadoop streaming etc. +# +def mpi_submit(nslave, slave_args): """ customized submit script, that submit nslave jobs, each must contain args as parameter note this can be a lambda function containing additional parameters in input @@ -24,17 +33,13 @@ def mpi_submit(nslave, args): args arguments to launch each job this usually includes the parameters of master_uri and parameters passed into submit """ - if args[0] == 'local': - cmd = ' '.join(['mpirun -n %d' % (nslave)] + args[1:]) + sargs = ' '.join(args.command + slave_args) + if args.hostfile is None: + cmd = ' '.join(['mpirun -n %d' % (nslave)] + args.command + slave_args) else: - cmd = ' '.join(['mpirun -n %d --hostfile %s' % (nslave, args[0])] + args[1:]) + ' '.join(['mpirun -n %d --hostfile %s' % (nslave, args.hostfile)] + args.command + slave_args) print cmd subprocess.check_call(cmd, shell = True) -if __name__ == '__main__': - if len(sys.argv) < 2: - print 'Usage: ' - print 'if == local, we will run using local mode' - exit(0) - # call submit, with nslave, the commands to run each job and submit function - tracker.submit(int(sys.argv[1]), sys.argv[2:], fun_submit= mpi_submit) +# call submit, with nslave, the commands to run each job and submit function +tracker.submit(args.nslaves, [], fun_submit = mpi_submit, verbose = args.verbose) diff --git a/tracker/rabit_tracker.py b/tracker/rabit_tracker.py index 0322edf5b..9823ef426 100644 --- a/tracker/rabit_tracker.py +++ b/tracker/rabit_tracker.py @@ -122,7 +122,7 @@ class SlaveEntry: return rmset class Tracker: - def __init__(self, port = 9091, port_end = 9999): + def __init__(self, port = 9091, port_end = 9999, verbose = True): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) for port in range(port, port_end): try: @@ -132,8 +132,9 @@ class Tracker: except socket.error: continue sock.listen(16) - self.sock = sock - print 'start listen on %s:%d' % (socket.gethostname(), self.port) + self.sock = sock + self.verbose = verbose + self.log_print('start listen on %s:%d' % (socket.gethostname(), self.port), 1) def __del__(self): self.sock.close() def slave_args(self): @@ -190,9 +191,12 @@ class Tracker: return ring_map def handle_print(self,slave, msg): sys.stdout.write(msg) - def log_print(self, msg): - sys.stderr.write(msg+'\n') - + def log_print(self, msg, level): + if level == 1: + if self.verbose: + sys.stderr.write(msg + '\n') + else: + sys.stderr.write(msg + '\n') def accept_slaves(self, nslave): # set of nodes that finishs the job shutdown = {} @@ -216,13 +220,12 @@ class Tracker: assert s.rank >= 0 and s.rank not in shutdown assert s.rank not in wait_conn shutdown[s.rank] = s - self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank)) + self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank), 1) continue - assert s.cmd == 'start' or s.cmd == 'recover' + assert s.cmd == 'start' or s.cmd == 'recover' # lazily initialize the slaves if tree_map == None: assert s.cmd == 'start' - print s.world_size if s.world_size > 0: nslave = s.world_size tree_map, parent_map = self.get_tree(nslave) @@ -239,18 +242,20 @@ class Tracker: assert len(todo_nodes) != 0 rank = todo_nodes.pop(0) if s.jobid != 'NULL': - job_map[s.jobid] = rank + job_map[s.jobid] = rank + if len(todo_nodes) == 0: + self.log_print('@tracker All of %d nodes getting started' % nslave, 2) s.assign_rank(rank, wait_conn, tree_map, parent_map, ring_map) - if s.cmd != 'start': - self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank)) + if s.cmd != 'start': + self.log_print('Recieve %s signal from %d' % (s.cmd, s.rank), 1) else: - self.log_print('Recieve %s signal from %s assign rank %d' % (s.cmd, s.host, s.rank)) + self.log_print('Recieve %s signal from %s; assign rank %d' % (s.cmd, s.host, s.rank), 1) if s.wait_accept > 0: - wait_conn[rank] = s - self.log_print('All nodes finishes job') + wait_conn[rank] = s + self.log_print('@tracker All nodes finishes job', 2) -def submit(nslave, args, fun_submit): - master = Tracker() +def submit(nslave, args, fun_submit, verbose): + master = Tracker(verbose = verbose) submit_thread = Thread(target = fun_submit, args = (nslave, args + master.slave_args())) submit_thread.start() master.accept_slaves(nslave) From 491716c418f5effcd4b0aeb7b1c7630f8788dc95 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 29 Dec 2014 06:21:34 -0800 Subject: [PATCH 151/531] chg --- tracker/rabit_hadoop.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 7bc855ec8..977df254c 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -19,8 +19,10 @@ hadoop_home = os.getenv('HADOOP_HOME') if hadoop_home != None: if hadoop_binary == None: hadoop_binary = hadoop_home + '/bin/hadoop' + assert os.path.exists(hadoop_binary), "HADDOP_HOME does not contain the hadoop binary" if hadoop_streaming_jar == None: - hadoop_streaming_jar = hadoop_home + '/bin/hadoop' + hadoop_streaming_jar = hadoop_home + '/lib/hadoop-streaming.har' + assert os.path.exists(hadoop_streaming_jar), "HADDOP_HOME does not contain the haddop streaming jar" if hadoop_binary == None or hadoop_streaming_jar == None: print 'Warning: Cannot auto-detect path to hadoop and streaming jar, need to set them via arguments -hs and -hb' From c731e82fae3e72649bd25a3cb643212f0360b583 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 29 Dec 2014 06:37:07 -0800 Subject: [PATCH 152/531] add command --- tracker/rabit_hadoop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 977df254c..b57bc1327 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -39,8 +39,6 @@ parser.add_argument('-o', '--output', required=True, help = 'output path in HDFS') parser.add_argument('-f', '--files', nargs = '*', help = 'the cached file list in mapreduce') -parser.add_argument('command', nargs='+', - help = 'command for rabit program') if hadoop_binary == None: parser.add_argument('-hb', '--hadoop_binary', required = True, help="path-to-hadoop binary folder") @@ -54,6 +52,8 @@ if hadoop_streaming_jar == None: else: parser.add_argument('-jar', '--hadoop_streaming_jar', default = hadoop_streaming_jar, help='path-to hadoop streamimg jar file') +parser.add_argument('command', nargs='+', + help = 'command for rabit program') args = parser.parse_args() def hadoop_streaming(nslaves, slave_args): From b1340bf3106852d24567402042dd70a496915dfa Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 29 Dec 2014 06:50:17 -0800 Subject: [PATCH 153/531] add auto cache --- tracker/rabit_hadoop.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index b57bc1327..21e59115f 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -33,6 +33,8 @@ parser.add_argument('-n', '--nslaves', required=True, type=int, help = 'number of slaves proccess to be launched') parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, help = 'print more messages into the console') +parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type=int, + help = 'whether automatically cache the files in the command list to hadoop localfile') parser.add_argument('-i', '--input', required=True, help = 'input path in HDFS') parser.add_argument('-o', '--output', required=True, @@ -61,8 +63,10 @@ def hadoop_streaming(nslaves, slave_args): cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) fset = set() - if os.path.exists(args.command[0]): - fset.add(args.command[0]) + if args.auto_file_cache: + for f in args.command: + if os.path.exists(f): + fset.add(f) for flst in args.files: for f in flst.split('#'): fset.add(f) From 76abd80cb7b05fec5bb5fd10d78910f55957cc30 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 29 Dec 2014 18:17:20 -0800 Subject: [PATCH 154/531] change indentation --- tracker/rabit_hadoop.py | 68 +++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 21e59115f..fbed6ad8f 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -17,16 +17,16 @@ hadoop_streaming_jar = None # code hadoop_home = os.getenv('HADOOP_HOME') if hadoop_home != None: - if hadoop_binary == None: - hadoop_binary = hadoop_home + '/bin/hadoop' - assert os.path.exists(hadoop_binary), "HADDOP_HOME does not contain the hadoop binary" - if hadoop_streaming_jar == None: - hadoop_streaming_jar = hadoop_home + '/lib/hadoop-streaming.har' - assert os.path.exists(hadoop_streaming_jar), "HADDOP_HOME does not contain the haddop streaming jar" + if hadoop_binary == None: + hadoop_binary = hadoop_home + '/bin/hadoop' + assert os.path.exists(hadoop_binary), "HADDOP_HOME does not contain the hadoop binary" + if hadoop_streaming_jar == None: + hadoop_streaming_jar = hadoop_home + '/lib/hadoop-streaming.har' + assert os.path.exists(hadoop_streaming_jar), "HADDOP_HOME does not contain the haddop streaming jar" if hadoop_binary == None or hadoop_streaming_jar == None: - print 'Warning: Cannot auto-detect path to hadoop and streaming jar, need to set them via arguments -hs and -hb' - print '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14' + print 'Warning: Cannot auto-detect path to hadoop and streaming jar, need to set them via arguments -hs and -hb' + print '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14' parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using Hadoop Streaming') parser.add_argument('-n', '--nslaves', required=True, type=int, @@ -34,45 +34,47 @@ parser.add_argument('-n', '--nslaves', required=True, type=int, parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, help = 'print more messages into the console') parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type=int, - help = 'whether automatically cache the files in the command list to hadoop localfile') + help = 'whether automatically cache the files in the command to hadoop localfile, this is on by defaultz') parser.add_argument('-i', '--input', required=True, help = 'input path in HDFS') parser.add_argument('-o', '--output', required=True, help = 'output path in HDFS') parser.add_argument('-f', '--files', nargs = '*', - help = 'the cached file list in mapreduce') + help = 'the cached file list in mapreduce,'\ + ' the submission script will automatically cache all the files which appears in command.'\ + ' you may need this option to cache additional files, or manually cache files when auto_file_cache is off') if hadoop_binary == None: - parser.add_argument('-hb', '--hadoop_binary', required = True, - help="path-to-hadoop binary folder") + parser.add_argument('-hb', '--hadoop_binary', required = True, + help="path-to-hadoop binary folder") else: - parser.add_argument('-hb', '--hadoop_binary', default = hadoop_binary, - help="path-to-hadoop binary folder") + parser.add_argument('-hb', '--hadoop_binary', default = hadoop_binary, + help="path-to-hadoop binary folder") if hadoop_streaming_jar == None: - parser.add_argument('-jar', '--hadoop_streaming_jar', required = True, - help='path-to hadoop streamimg jar file') + parser.add_argument('-jar', '--hadoop_streaming_jar', required = True, + help='path-to hadoop streamimg jar file') else: - parser.add_argument('-jar', '--hadoop_streaming_jar', default = hadoop_streaming_jar, - help='path-to hadoop streamimg jar file') + parser.add_argument('-jar', '--hadoop_streaming_jar', default = hadoop_streaming_jar, + help='path-to hadoop streamimg jar file') parser.add_argument('command', nargs='+', help = 'command for rabit program') args = parser.parse_args() def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nslaves) - cmd += ' -input %s -output %s' % (args.input, args.output) - cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) - fset = set() - if args.auto_file_cache: - for f in args.command: - if os.path.exists(f): - fset.add(f) - for flst in args.files: - for f in flst.split('#'): - fset.add(f) - for f in fset: - cmd += ' -file %s' % f - print cmd - subprocess.check_call(cmd, shell = True) + cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nslaves) + cmd += ' -input %s -output %s' % (args.input, args.output) + cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) + fset = set() + if args.auto_file_cache: + for f in args.command: + if os.path.exists(f): + fset.add(f) + for flst in args.files: + for f in flst.split('#'): + fset.add(f) + for f in fset: + cmd += ' -file %s' % f + print cmd + subprocess.check_call(cmd, shell = True) tracker.submit(args.nslaves, [], fun_submit = hadoop_streaming, verbose = args.verbose) From 39504825d8233494eebb00357e3fb3d608037e26 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 29 Dec 2014 18:32:56 -0800 Subject: [PATCH 155/531] add kmeans example --- toolkit/Makefile | 9 ++++----- toolkit/kmeans.cpp | 4 +++- toolkit/kmeans_hadoop.sh | 2 +- toolkit/toolkit_util.h | 24 ++++++++++++++++-------- tracker/rabit_hadoop.py | 2 +- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/toolkit/Makefile b/toolkit/Makefile index 69819246a..646558a74 100644 --- a/toolkit/Makefile +++ b/toolkit/Makefile @@ -7,12 +7,11 @@ export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include # specify tensor path BIN = kmeans.rabit MOCKBIN= kmeans.mock +MPIBIN = kmeans.mpi # objectives that makes up rabit library OBJ = kmeans.o -MPIBIN = kmeans.mpi -.PHONY: clean all lib libmpi - -all: $(BIN) $(MOCKBIN) +.PHONY: clean all lib +all: $(BIN) lib: cd ..;make lib/librabit.a lib/librabit_mock.a; cd - @@ -38,4 +37,4 @@ $(MPIBIN) : $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) -lrabit_mpi clean: - $(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~ + $(RM) $(OBJ) $(BIN) $(MPIBIN) $(MOCKBIN) *~ ../src/*~ diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cpp index e6be48fc0..0a8171f9f 100644 --- a/toolkit/kmeans.cpp +++ b/toolkit/kmeans.cpp @@ -83,7 +83,9 @@ inline size_t GetCluster(const Matrix ¢roids, int main(int argc, char *argv[]) { if (argc < 5) { - printf("Usage: num_cluster max_iter \n"); + if (rabit::GetRank() == 0) { + rabit::TrackerPrintf("Usage: num_cluster max_iter \n"); + } return 0; } clock_t tStart = clock(); diff --git a/toolkit/kmeans_hadoop.sh b/toolkit/kmeans_hadoop.sh index 9e7b3b832..fb8d1d5a2 100755 --- a/toolkit/kmeans_hadoop.sh +++ b/toolkit/kmeans_hadoop.sh @@ -6,4 +6,4 @@ then fi #set path to hadoop streaming jar here STREAMING_JAR= -python ../tracker/rabit_hadoop.py -hs $STREAMING_JAR -s $1 -i $2 -m kmeans.rabit --args "stdin "$3" "$4" stdout" -o $5 --file kmeans.rabit +python ../tracker/rabit_hadoop.py -hs $STREAMING_JAR -n $1 -i $2 -o $5 kmeans.rabit stdin $3 $4 stdout diff --git a/toolkit/toolkit_util.h b/toolkit/toolkit_util.h index a2f8f56ac..061d3e97b 100644 --- a/toolkit/toolkit_util.h +++ b/toolkit/toolkit_util.h @@ -29,7 +29,7 @@ struct SparseMat { v.length = static_cast(row_ptr[i + 1]-row_ptr[i]); return v; } - // load data from file + // load data from LibSVM format inline void Load(const char *fname) { FILE *fi; if (!strcmp(fname, "stdin")) { @@ -41,17 +41,25 @@ struct SparseMat { row_ptr.push_back(0); data.clear(); feat_dim = 0; - unsigned num_feat; - while (fscanf(fi, "%u", &num_feat) == 1) { + float label; bool init = true; + char tmp[1024]; + while (fscanf(file, "%s", tmp) == 1) { Entry e; - for (unsigned i = 0; i < num_feat; ++i) { - utils::Check(fscanf(fi, "%u:%f", &e.findex, &e.fvalue) == 2, - "invalid format"); + if (sscanf(tmp, "%u:%f", &e.findex, &e.fvalue) == 2) { data.push_back(e); feat_dim = std::max(e.findex, feat_dim); + } else { + if (!init) { + labels.push_back(label); + row_ptr.push_back(data.size()); + } + utils::Check(sscanf(tmp, "%f", &label) == 1, "invalid LibSVM format"); + init = false; } - row_ptr.push_back(data.size()); } + // last row + labels.push_back(label); + row_ptr.push_back(data.size()); feat_dim += 1; // close the filed if (fi != stdin) fclose(fi); @@ -63,6 +71,7 @@ struct SparseMat { unsigned feat_dim; std::vector row_ptr; std::vector data; + std::vector labels; }; // dense matrix struct Matrix { @@ -85,7 +94,6 @@ struct Matrix { } else { fo = utils::FopenCheck(fname, "w"); } - fprintf(fo, "%lu %lu\n", nrow, ncol); for (size_t i = 0; i < data.size(); ++i) { fprintf(fo, "%g", data[i]); if ((i+1) % ncol == 0) { diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index fbed6ad8f..0d2a33b90 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -11,7 +11,7 @@ import subprocess import rabit_tracker as tracker #!!! Set path to hadoop and hadoop streaming jar here -hadoop_binary = None +hadoop_binary = 'hadoop' hadoop_streaming_jar = None # code From bdfa1a0220778c72665aabd548951c2e40a29e57 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 29 Dec 2014 18:42:24 -0800 Subject: [PATCH 156/531] change nslave to nworker --- tracker/rabit_hadoop.py | 23 ++++++++++++++--------- tracker/rabit_mpi.py | 6 +++--- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 0d2a33b90..70f3aea9b 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -29,20 +29,21 @@ if hadoop_binary == None or hadoop_streaming_jar == None: print '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14' parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using Hadoop Streaming') -parser.add_argument('-n', '--nslaves', required=True, type=int, - help = 'number of slaves proccess to be launched') -parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, - help = 'print more messages into the console') -parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type=int, - help = 'whether automatically cache the files in the command to hadoop localfile, this is on by defaultz') +parser.add_argument('-n', '--nworker', required=True, type=int, + help = 'number of worker proccess to be launched') parser.add_argument('-i', '--input', required=True, help = 'input path in HDFS') parser.add_argument('-o', '--output', required=True, help = 'output path in HDFS') +parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, + help = 'print more messages into the console') +parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type=int, + help = 'whether automatically cache the files in the command to hadoop localfile, this is on by default') parser.add_argument('-f', '--files', nargs = '*', help = 'the cached file list in mapreduce,'\ ' the submission script will automatically cache all the files which appears in command.'\ ' you may need this option to cache additional files, or manually cache files when auto_file_cache is off') +parser.add_argument('--jobname', help = 'customize jobname in tracker') if hadoop_binary == None: parser.add_argument('-hb', '--hadoop_binary', required = True, help="path-to-hadoop binary folder") @@ -60,8 +61,12 @@ parser.add_argument('command', nargs='+', help = 'command for rabit program') args = parser.parse_args() -def hadoop_streaming(nslaves, slave_args): - cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nslaves) +if args.jobname is None: + args.jobname = ('Rabit(nworker=%d):' % args.nworker) + args.command[0].split('/')[-1]; + +def hadoop_streaming(nworker, slave_args): + cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) + cmd += ' -D mapred.job.name=%d' % (a) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) fset = set() @@ -77,4 +82,4 @@ def hadoop_streaming(nslaves, slave_args): print cmd subprocess.check_call(cmd, shell = True) -tracker.submit(args.nslaves, [], fun_submit = hadoop_streaming, verbose = args.verbose) +tracker.submit(args.nworker, [], fun_submit = hadoop_streaming, verbose = args.verbose) diff --git a/tracker/rabit_mpi.py b/tracker/rabit_mpi.py index d8aa968f5..662c173bc 100755 --- a/tracker/rabit_mpi.py +++ b/tracker/rabit_mpi.py @@ -10,8 +10,8 @@ import subprocess import rabit_tracker as tracker parser = argparse.ArgumentParser(description='Rabit script to submit rabit job using MPI') -parser.add_argument('-n', '--nslaves', required=True, type=int, - help = 'number of slaves proccess to be launched') +parser.add_argument('-n', '--nworker', required=True, type=int, + help = 'number of worker proccess to be launched') parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, help = 'print more messages into the console') parser.add_argument('-H', '--hostfile', type=str, @@ -42,4 +42,4 @@ def mpi_submit(nslave, slave_args): subprocess.check_call(cmd, shell = True) # call submit, with nslave, the commands to run each job and submit function -tracker.submit(args.nslaves, [], fun_submit = mpi_submit, verbose = args.verbose) +tracker.submit(args.nworker, [], fun_submit = mpi_submit, verbose = args.verbose) From 1bcea65117cf9c53f3e90e92211019497182bfe3 Mon Sep 17 00:00:00 2001 From: tqchen Date: Mon, 29 Dec 2014 18:44:30 -0800 Subject: [PATCH 157/531] change nslave to nworker --- tracker/rabit_hadoop.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 70f3aea9b..d25fae7c0 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -42,7 +42,8 @@ parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type= parser.add_argument('-f', '--files', nargs = '*', help = 'the cached file list in mapreduce,'\ ' the submission script will automatically cache all the files which appears in command.'\ - ' you may need this option to cache additional files, or manually cache files when auto_file_cache is off') + ' You may need this option to cache additional files.'\ + ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', help = 'customize jobname in tracker') if hadoop_binary == None: parser.add_argument('-hb', '--hadoop_binary', required = True, From bfb9aa3d77d30e5072bca3e87650142187a1623d Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 30 Dec 2014 04:37:50 -0800 Subject: [PATCH 158/531] add native script --- src/allreduce_mock.h | 3 ++- test/keepalive.sh | 14 ----------- test/test.mk | 10 ++++---- tracker/rabit_demo.py | 57 +++++++++++++++++++++++++++++++++++++++++++ tracker/rabit_mpi.py | 4 +-- 5 files changed, 65 insertions(+), 23 deletions(-) delete mode 100755 test/keepalive.sh create mode 100755 tracker/rabit_demo.py diff --git a/src/allreduce_mock.h b/src/allreduce_mock.h index f46ee6885..96bc55800 100644 --- a/src/allreduce_mock.h +++ b/src/allreduce_mock.h @@ -85,7 +85,8 @@ class AllreduceMock : public AllreduceRobust { inline void Verify(const MockKey &key, const char *name) { if (mock_map.count(key) != 0) { num_trial += 1; - utils::Error("[%d]@@@Hit Mock Error:%s", rank, name); + fprintf(stderr, "[%d]@@@Hit Mock Error:%s\n", rank, name); + exit(-2); } } }; diff --git a/test/keepalive.sh b/test/keepalive.sh deleted file mode 100755 index c4df061a9..000000000 --- a/test/keepalive.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -if [ "$#" -lt 1 ]; -then - echo "Usage: program parameters" - echo "Repeatively run program until success" - exit -1 -fi -nrep=0 -echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK -until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep; do - sleep 1 - nrep=$((nrep+1)) - echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep -done diff --git a/test/test.mk b/test/test.mk index b1dddb0b4..b3525b6d4 100644 --- a/test/test.mk +++ b/test/test.mk @@ -10,17 +10,17 @@ endif local_recover: - ../tracker/rabit_mpi.py -n $(nslave) test_local_recover $(ndata) rabit_local_replica=1 + ../tracker/rabit_demo.py -n $(nslave) test_local_recover $(ndata) rabit_local_replica=1 local_recover_10_10k: - ../tracker/rabit_mpi.py -n 10 test_local_recover 10000 rabit_local_replica=1 + ../tracker/rabit_demo.py -n 10 test_local_recover 10000 rabit_local_replica=1 # this experiment test recovery with actually process exit, use keepalive to keep program alive model_recover_10_10k: - ../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 + ../tracker/rabit_demo.py -n 10 test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 model_recover_10_10k_die_same: - ../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 + ../tracker/rabit_demo.py -n 10 test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 model_recover_10_10k_die_hard: - ../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0 + ../tracker/rabit_demo.py -n 10 test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0 diff --git a/tracker/rabit_demo.py b/tracker/rabit_demo.py new file mode 100755 index 000000000..aeeb6e9a3 --- /dev/null +++ b/tracker/rabit_demo.py @@ -0,0 +1,57 @@ +#!/usr/bin/python +""" +This is the demo submission script of rabit, it is created to +submit rabit jobs using hadoop streaming +""" +import argparse +import sys +import os +import subprocess +from threading import Thread +import rabit_tracker as tracker + +parser = argparse.ArgumentParser(description='Rabit script to submit rabit job locally using python subprocess') +parser.add_argument('-n', '--nworker', required=True, type=int, + help = 'number of worker proccess to be launched') +parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, + help = 'print more messages into the console') +parser.add_argument('command', nargs='+', + help = 'command for rabit program') +args = parser.parse_args() + +def exec_cmd(cmd, taskid): + if cmd[0].find('/') == -1 and os.path.exists(cmd[0]): + cmd[0] = './' + cmd[0] + cmd = ' '.join(cmd) + ntrial = 0 + while True: + arg = ' rabit_task_id=%d rabit_num_trial=%d' % (taskid, ntrial) + ret = subprocess.call(cmd + arg, shell = True) + if ret == 254 or ret == -2: + ntrial += 1 + continue + if ret == 0: + return + raise Exception('Get nonzero return code=%d' % ret) +# +# Note: this submit script is only used for demo purpose +# submission script using pyhton multi-threading +# +def mthread_submit(nslave, slave_args): + """ + customized submit script, that submit nslave jobs, each must contain args as parameter + note this can be a lambda function containing additional parameters in input + Parameters + nslave number of slave process to start up + args arguments to launch each job + this usually includes the parameters of master_uri and parameters passed into submit + """ + procs = {} + for i in range(nslave): + procs[i] = Thread(target = exec_cmd, args = (args.command + slave_args, i)) + procs[i].start() + for i in range(nslave): + procs[i].join() + +# call submit, with nslave, the commands to run each job and submit function +tracker.submit(args.nworker, [], fun_submit = mthread_submit, verbose = args.verbose) diff --git a/tracker/rabit_mpi.py b/tracker/rabit_mpi.py index 662c173bc..604ed3bf7 100755 --- a/tracker/rabit_mpi.py +++ b/tracker/rabit_mpi.py @@ -20,9 +20,7 @@ parser.add_argument('command', nargs='+', help = 'command for rabit program') args = parser.parse_args() # -# Note: this submit script is only used for demo purpose -# It does not have to be mpirun, it can be any job submission -# script that starts the job, qsub, hadoop streaming etc. +# submission script using MPI # def mpi_submit(nslave, slave_args): """ From 06206e1d037e8bec405c87f58a5dc2214e898aae Mon Sep 17 00:00:00 2001 From: tqchen Date: Tue, 30 Dec 2014 06:22:54 -0800 Subject: [PATCH 159/531] start checkin guides --- guide/Makefile | 26 ++++++++++++++++++++++++ guide/README.md | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ guide/basic.cc | 25 +++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 guide/Makefile create mode 100644 guide/README.md create mode 100644 guide/basic.cc diff --git a/guide/Makefile b/guide/Makefile new file mode 100644 index 000000000..c8000aabc --- /dev/null +++ b/guide/Makefile @@ -0,0 +1,26 @@ +export CC = gcc +export CXX = g++ +export MPICXX = mpicxx +export LDFLAGS= -pthread -lm -L../lib +export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include + +.PHONY: clean all lib libmpi +BIN = basic.rabit +MOCKBIN= + +all: $(BIN) $(MOCKBIN) +basic.rabit: basic.cc lib + +$(BIN) : + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit +$(MOCKBIN) : + $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mock + +$(OBJ) : + $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) + +$(MPIBIN) : + $(MPICXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS) -lrabit_mpi + +clean: + $(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~ \ No newline at end of file diff --git a/guide/README.md b/guide/README.md new file mode 100644 index 000000000..a41160123 --- /dev/null +++ b/guide/README.md @@ -0,0 +1,54 @@ +Tutorial of Rabit +===== +This is an tutorial of rabit, a Reliable Allreduce and Broadcast interface. +To run the examples locally, you will need to type ```make``` to build all the examples. + +**List of Topics** +* [What is Allreduce](#what-is-allreduce) +* [Common Usecase of Allreduce](#common-use-case) + +What is Allreduce +===== +The main method provided by rabit are Allreduce and Broadcast. Allreduce performs reduction across different computation nodes, +and returning the results to all the nodes. To understand the behavior of the function. Consider the following example in [basic.cc](basic.cc). +```c++ +#include +using namespace rabit; +const int N = 3; +int main(int argc, char *argv[]) { + int a[N]; + rabit::Init(argc, argv); + for (int i = 0; i < N; ++i) { + a[i] = rabit::GetRank() + i; + } + printf("@node[%d] before-allreduce: a={%d, %d, %d}\n", + rabit::GetRank(), a[0], a[1], a[2]); + // allreduce take max of each elements in all processes + Allreduce(&a[0], N); + printf("@node[%d] after-allreduce: a={%d, %d, %d}\n", + rabit::GetRank(), a[0], a[1], a[2]); + rabit::Finalize(); + return 0; +} +``` +You can run the example using the rabit_demo.py script. The following commmand +start rabit program with two worker processes. +```bash +../tracker/rabit_demo.py -n 2 basic.rabit +``` +This will start two process, one process with rank 0 and another rank 1, running the same code. +The ```rabit::GetRank()``` function return the rank of current process. + +Before the call the allreduce, process 0 contains array ```a = {0, 1, 2}```, while process 1 have array +```a = {1, 2, 3}```. After the call of Allreduce, the array contents in all processes are replaced by the +reduction result (in this case, the maximum value in each position across all the processes). So after the +Allreduce call, the result will become ```a={1, 2, 3}```. + +You can also run example with different processes by setting -n to different values, to see the outcomming result. +Rabit provides different reduction operators, for example, you can change ```op::Max``` to ```op::Sum```, to change +the reduction method from maximum to summation. + + +Common Use Case +===== + diff --git a/guide/basic.cc b/guide/basic.cc new file mode 100644 index 000000000..e7863b1fd --- /dev/null +++ b/guide/basic.cc @@ -0,0 +1,25 @@ +/*! + * Copyright (c) 2014 by Contributors + * \file basic.cc + * \brief This is an example demonstrating what is Allreduce + * + * \author Tianqi Chen + */ +#include +using namespace rabit; +const int N = 3; +int main(int argc, char *argv[]) { + int a[N]; + rabit::Init(argc, argv); + for (int i = 0; i < N; ++i) { + a[i] = rabit::GetRank() + i; + } + printf("@node[%d] before-allreduce: a={%d, %d, %d}\n", + rabit::GetRank(), a[0], a[1], a[2]); + // allreduce take max of each elements in all processes + Allreduce(&a[0], N); + printf("@node[%d] after-allreduce: a={%d, %d, %d}\n", + rabit::GetRank(), a[0], a[1], a[2]); + rabit::Finalize(); + return 0; +} From 90a8505208ec70da9b530b211c5b390d1ce6665f Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 1 Jan 2015 05:42:03 -0800 Subject: [PATCH 160/531] update guide --- guide/Makefile | 3 +- guide/README.md | 119 ++++++++++++++++++++++++++++-- toolkit/Makefile | 2 +- toolkit/README.md | 5 ++ toolkit/{kmeans.cpp => kmeans.cc} | 0 toolkit/toolkit_util.h | 2 +- 6 files changed, 122 insertions(+), 9 deletions(-) rename toolkit/{kmeans.cpp => kmeans.cc} (100%) diff --git a/guide/Makefile b/guide/Makefile index c8000aabc..1770a90dd 100644 --- a/guide/Makefile +++ b/guide/Makefile @@ -5,11 +5,12 @@ export LDFLAGS= -pthread -lm -L../lib export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../include .PHONY: clean all lib libmpi -BIN = basic.rabit +BIN = basic.rabit broadcast.rabit MOCKBIN= all: $(BIN) $(MOCKBIN) basic.rabit: basic.cc lib +broadcast.rabit: broadcast.cc lib $(BIN) : $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit diff --git a/guide/README.md b/guide/README.md index a41160123..311681aac 100644 --- a/guide/README.md +++ b/guide/README.md @@ -1,11 +1,13 @@ Tutorial of Rabit ===== -This is an tutorial of rabit, a Reliable Allreduce and Broadcast interface. +This is an tutorial of rabit, a ***Reliable Allreduce and Broadcast interface***. To run the examples locally, you will need to type ```make``` to build all the examples. **List of Topics** * [What is Allreduce](#what-is-allreduce) -* [Common Usecase of Allreduce](#common-use-case) +* [Common Use Case](#common-use-case) +* [Structure of Rabit Program](#structure-of-rabit-program) +* [Fault Tolerance](#fault-tolerance) What is Allreduce ===== @@ -42,13 +44,118 @@ The ```rabit::GetRank()``` function return the rank of current process. Before the call the allreduce, process 0 contains array ```a = {0, 1, 2}```, while process 1 have array ```a = {1, 2, 3}```. After the call of Allreduce, the array contents in all processes are replaced by the reduction result (in this case, the maximum value in each position across all the processes). So after the -Allreduce call, the result will become ```a={1, 2, 3}```. - +Allreduce call, the result will become ```a = {1, 2, 3}```. +Rabit provides different reduction operators, for example, you can change ```op::Max``` to ```op::Sum```, +then the reduction operation will become the summation, and the result will become ```a = {1, 3, 5}```. You can also run example with different processes by setting -n to different values, to see the outcomming result. -Rabit provides different reduction operators, for example, you can change ```op::Max``` to ```op::Sum```, to change -the reduction method from maximum to summation. +Broadcast is another method provided by rabit besides Allreduce, this function allows one node to broadcast its +local data to all the other nodes. The following code in [broadcast.cc](broadcast.cc) broadcast a string from +node 0 to all other nodes. +```c++ +#include +using namespace rabit; +const int N = 3; +int main(int argc, char *argv[]) { + rabit::Init(argc, argv); + std::string s; + if (rabit::GetRank() == 0) s = "hello world"; + printf("@node[%d] before-broadcast: s=\"%s\"\n", + rabit::GetRank(), s.c_str()); + // broadcast s from node 0 to all other nodes + rabit::Broadcast(&s, 0); + printf("@node[%d] after-broadcast: s=\"%s\"\n", + rabit::GetRank(), s.c_str()); + rabit::Finalize(); + return 0; +} +``` +You can run the program by the following command, using three workers. +```bash +../tracker/rabit_demo.py -n 3 broadcast.rabit +``` +Besides string, rabit also allows broadcast of constant size array and vector. Common Use Case ===== +Many distributed machine learning algorithm involves dividing the data into each node, +compute statistics locally and aggregates them together. Such process is usually done repeatively in +many iterations before the algorithm converge. Allreduce naturally meets the need of such programs, +common use cases include: +* Aggregation of gradient values, which can be used in optimization methods such as L-BFGS. +* Aggregation of other statistics, which can be used in KMeans and Gaussian Mixture Model. +* Find the best split candidate and aggregation of split statistics, used for tree based models. + +The main purpose of Rabit is to provide reliable and portable library for distributed machine learning programs. +So that the program can be run reliably on different types of platforms. + +Structure of Rabit Program +===== +The following code illustrates the common structure of rabit program. This is an abstract example, +you can also refer to [kmeans.cc](../toolkit/kmeans.cc) for an example implementation of kmeans. + +```c++ +#include +int main(int argc, char *argv[]) { + ... + rabit::Init(argc, argv); + // load the latest checked model + int version = rabit::LoadCheckPoint(&model); + // initialize the model if it is the first version + if (version == 0) model.InitModel(); + // the version number marks the iteration to resume + for (int iter = version; iter < max_iter; ++iter) { + // model should be sufficient variable at this point + ... + // each iteration can contain multiple calls of allreduce/broadcast + rabit::Allreduce(&data[0], n); + ... + // checkpoint model after one iteration finishes + rabit::CheckPoint(&model); + } + rabit::Finalize(); + return 0; +} +``` + +Besides the common Allreduce and Broadcast function, there are two additional functions: ```CheckPoint``` +and ```CheckPoint```. These two functions are used for fault-tolerance purpose. +Common machine learning programs involves several iterations. In each iteration, we start from a model, do some calls +to Allreduce or Broadcasts and update the model to a new one. The calling sequence in each iteration does not need to be the same. + +* When the nodes start from beginning, LoadCheckPoint returns 0, and we can initialize the model. +* ```CheckPoint``` saves the model after each iteration. + - Efficiency Note: the model is only kept in local memory and no save to disk is involved in Checkpoint +* When a node goes down and restarts, ```LoadCheckPoint``` will recover the latest saved model, and +* When a node goes down, the rest of the node will block in the call of Allreduce/Broadcast and helps + the recovery of the failure nodes, util it catches up. + +Please also see the next section for introduction of fault tolerance procedure in rabit. + +Fault Tolerance +===== +This section introduces the how fault tolerance works in rabit. +We can use the following figure to show the how rabit deals with failures. + +![](http://homes.cs.washington.edu/~tqchen/rabit/fig/fault-tol.png) + +The scenario is as follows: +* Node 1 fails between the first and second call of Allreduce after the latest checkpoint +* Other nodes stay in the call of second Allreduce to help node 1 to recover. +* When node 1 restarts, it will call ```LoadCheckPoint```, and get the latest checkpoint from one of the existing nodes. +* Then node 1 can start from the latest checkpoint and continue running. +* When node 1 call the first Allreduce again, because the other nodes already knows the result of allreduce, node 1 can get the result from one of the nodes. +* When node 1 reaches the second Allreduce, other nodes find out that node 1 has catched up and they can continue the program normally. + +We can find that this fault tolerance model is based on the a key property of Allreduce and Broadcast: +All the nodes get the same result when calling Allreduce/Broadcast. Because of this property, we can have some node records the history, +and when a node recovers, the result can be forwarded to the recovering node. + +The checkpoint is introduced so that we do not have to discard the history before the checkpoint, so that the iterative program can be more +efficient. The strategy of rabit is different from fail-restart strategy where all the nodes restarts from checkpoint +when any of the node fails. All the program only block in the Allreduce call to help the recovery, and the checkpoint is only saved locally without +touching the disk. This makes rabit program more reliable and efficient. + +This is an conceptual introduction to the fault tolerant model of rabit. The actual implementation is more sophiscated, +and can deal with more complicated cases such as multiple nodes failure and node failure during recovery phase. diff --git a/toolkit/Makefile b/toolkit/Makefile index 646558a74..3b74f9ba6 100644 --- a/toolkit/Makefile +++ b/toolkit/Makefile @@ -18,7 +18,7 @@ lib: libmpi: cd ..;make lib/librabit_mpi.a;cd - -kmeans.o: kmeans.cpp ../src/*.h +kmeans.o: kmeans.cc ../src/*.h # we can link against MPI version to get use MPI kmeans.rabit: kmeans.o lib diff --git a/toolkit/README.md b/toolkit/README.md index c88d931c9..5a3845465 100644 --- a/toolkit/README.md +++ b/toolkit/README.md @@ -1,3 +1,8 @@ Toolkit ==== This folder contains example toolkit developed using rabit + +KMeans +==== +* Kmeans taks in LIBSVM format +* You will need a dummy label field at beginning of all the lines to get KMeans diff --git a/toolkit/kmeans.cpp b/toolkit/kmeans.cc similarity index 100% rename from toolkit/kmeans.cpp rename to toolkit/kmeans.cc diff --git a/toolkit/toolkit_util.h b/toolkit/toolkit_util.h index 061d3e97b..d616ac0bf 100644 --- a/toolkit/toolkit_util.h +++ b/toolkit/toolkit_util.h @@ -43,7 +43,7 @@ struct SparseMat { feat_dim = 0; float label; bool init = true; char tmp[1024]; - while (fscanf(file, "%s", tmp) == 1) { + while (fscanf(fi, "%s", tmp) == 1) { Entry e; if (sscanf(tmp, "%u:%f", &e.findex, &e.fvalue) == 2) { data.push_back(e); From 31a3d22af433d19fc49113b55b462e13d33c2685 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 1 Jan 2015 05:42:38 -0800 Subject: [PATCH 161/531] add broadcast --- guide/broadcast.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 guide/broadcast.cc diff --git a/guide/broadcast.cc b/guide/broadcast.cc new file mode 100644 index 000000000..83dbe67fe --- /dev/null +++ b/guide/broadcast.cc @@ -0,0 +1,16 @@ +#include +using namespace rabit; +const int N = 3; +int main(int argc, char *argv[]) { + rabit::Init(argc, argv); + std::string s; + if (rabit::GetRank() == 0) s = "hello world"; + printf("@node[%d] before-broadcast: s=\"%s\"\n", + rabit::GetRank(), s.c_str()); + // broadcast s from node 0 to all other nodes + rabit::Broadcast(&s, 0); + printf("@node[%d] after-broadcast: s=\"%s\"\n", + rabit::GetRank(), s.c_str()); + rabit::Finalize(); + return 0; +} From 2bfbbfb381372098d9c67a1190160ddf7cd312b9 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 1 Jan 2015 05:48:34 -0800 Subject: [PATCH 162/531] checkin API doc --- .gitignore | 1 + doc/Doxyfile | 287 +++++++++++++++++++++++++++++++++++ doc/mkdoc.sh | 4 + include/rabit.h | 12 +- include/rabit_serializable.h | 2 +- 5 files changed, 299 insertions(+), 7 deletions(-) create mode 100644 doc/Doxyfile create mode 100755 doc/mkdoc.sh diff --git a/.gitignore b/.gitignore index 3bd3d6a61..6a2c32d90 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ *.exe *.txt *tmp* +doc \ No newline at end of file diff --git a/doc/Doxyfile b/doc/Doxyfile new file mode 100644 index 000000000..2e1af0286 --- /dev/null +++ b/doc/Doxyfile @@ -0,0 +1,287 @@ +# Doxyfile 1.7.6.1 + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = "rabit" +PROJECT_NUMBER = +PROJECT_BRIEF = +PROJECT_LOGO = +OUTPUT_DIRECTORY = ../doc +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 8 +ALIASES = +TCL_SUBST = +OPTIMIZE_OUTPUT_FOR_C = YES +OPTIMIZE_OUTPUT_JAVA = NO +OPTIMIZE_FOR_FORTRAN = NO +OPTIMIZE_OUTPUT_VHDL = NO +EXTENSION_MAPPING = +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +IDL_PROPERTY_SUPPORT = YES +DISTRIBUTE_GROUP_DOC = NO +SUBGROUPING = YES +INLINE_GROUPED_CLASSES = NO +INLINE_SIMPLE_STRUCTS = NO +TYPEDEF_HIDES_STRUCT = NO +SYMBOL_CACHE_SIZE = 0 +LOOKUP_CACHE_SIZE = 0 +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = NO +EXTRACT_ANON_NSPACES = NO +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = YES +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = YES +FORCE_LOCAL_INCLUDES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_MEMBERS_CTORS_1ST = NO +SORT_GROUP_NAMES = NO +SORT_BY_SCOPE_NAME = NO +STRICT_PROTO_MATCHING = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_DIRECTORIES = NO +SHOW_FILES = YES +SHOW_NAMESPACES = YES +FILE_VERSION_FILTER = +LAYOUT_FILE = +CITE_BIB_FILES = +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = YES +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = +INPUT_ENCODING = UTF-8 +FILE_PATTERNS = +RECURSIVE = NO +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = *-inl.hpp +EXCLUDE_SYMBOLS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +FILTER_SOURCE_PATTERNS = +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = NO +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +REFERENCES_LINK_SOURCE = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = YES +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_EXTRA_FILES = +HTML_COLORSTYLE_HUE = 220 +HTML_COLORSTYLE_SAT = 100 +HTML_COLORSTYLE_GAMMA = 80 +HTML_TIMESTAMP = YES +HTML_ALIGN_MEMBERS = YES +HTML_DYNAMIC_SECTIONS = NO +GENERATE_DOCSET = NO +DOCSET_FEEDNAME = "Doxygen generated docs" +DOCSET_BUNDLE_ID = org.doxygen.Project +DOCSET_PUBLISHER_ID = org.doxygen.Publisher +DOCSET_PUBLISHER_NAME = Publisher +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +CHM_INDEX_ENCODING = +BINARY_TOC = NO +TOC_EXPAND = NO +GENERATE_QHP = NO +QCH_FILE = +QHP_NAMESPACE = org.doxygen.Project +QHP_VIRTUAL_FOLDER = doc +QHP_CUST_FILTER_NAME = +QHP_CUST_FILTER_ATTRS = +QHP_SECT_FILTER_ATTRS = +QHG_LOCATION = +GENERATE_ECLIPSEHELP = NO +ECLIPSE_DOC_ID = org.doxygen.Project +DISABLE_INDEX = NO +GENERATE_TREEVIEW = NO +ENUM_VALUES_PER_LINE = 4 +USE_INLINE_TREES = NO +TREEVIEW_WIDTH = 250 +EXT_LINKS_IN_WINDOW = NO +FORMULA_FONTSIZE = 10 +FORMULA_TRANSPARENT = YES +USE_MATHJAX = NO +MATHJAX_RELPATH = http://www.mathjax.org/mathjax +MATHJAX_EXTENSIONS = +SEARCHENGINE = YES +SERVER_BASED_SEARCH = NO +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = YES +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4 +EXTRA_PACKAGES = +LATEX_HEADER = +LATEX_FOOTER = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +LATEX_SOURCE_CODE = NO +LATEX_BIB_STYLE = plain +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = NO +XML_OUTPUT = xml +XML_SCHEMA = +XML_DTD = +XML_PROGRAMLISTING = YES +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = NO +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = YES +MSCGEN_PATH = +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = NO +DOT_NUM_THREADS = 0 +DOT_FONTNAME = Helvetica +DOT_FONTSIZE = 10 +DOT_FONTPATH = +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +CALLER_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +INTERACTIVE_SVG = NO +DOT_PATH = +DOTFILE_DIRS = +MSCFILE_DIRS = +DOT_GRAPH_MAX_NODES = 50 +MAX_DOT_GRAPH_DEPTH = 0 +DOT_TRANSPARENT = NO +DOT_MULTI_TARGETS = YES +GENERATE_LEGEND = YES +DOT_CLEANUP = YES diff --git a/doc/mkdoc.sh b/doc/mkdoc.sh new file mode 100755 index 000000000..4bc0284c3 --- /dev/null +++ b/doc/mkdoc.sh @@ -0,0 +1,4 @@ +#!/bin/bash +cd ../include +doxygen ../doc/Doxyfile +cd ../doc diff --git a/include/rabit.h b/include/rabit.h index 5b2db3098..7940b8616 100644 --- a/include/rabit.h +++ b/include/rabit.h @@ -60,7 +60,7 @@ inline std::string GetProcessorName(void); * \brief print the msg to the tracker, * this function can be used to communicate the information of the progress to * the user who monitors the tracker - * \param msg, the message to be printed + * \param msg the message to be printed */ inline void TrackerPrint(const std::string &msg); #ifndef RABIT_STRICT_CXX98_ @@ -108,8 +108,8 @@ inline void Broadcast(std::string *sendrecv_data, int root); * ... * \param sendrecvbuf buffer for both sending and recving data * \param count number of elements to be reduced - * \param prepare_func Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) - * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. + * \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) + * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called * \param prepare_arg argument used to passed into the lazy preprocessing function * \tparam OP see namespace op, reduce operator @@ -136,7 +136,7 @@ inline void Allreduce(DType *sendrecvbuf, size_t count, * ... * \param sendrecvbuf buffer for both sending and recving data * \param count number of elements to be reduced - * \param prepare_func Lazy lambda preprocessing function, prepare_fun() will be invoked + * \param prepare_fun Lazy lambda preprocessing function, prepare_fun() will be invoked * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called * \tparam OP see namespace op, reduce operator @@ -211,7 +211,7 @@ class Reducer { * \brief customized in-place all reduce operation * \param sendrecvbuf the in place send-recv buffer * \param count number of elements to be reduced - * \param prepare_func Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) + * \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called * \param prepare_arg argument used to passed into the lazy preprocessing function @@ -253,7 +253,7 @@ class SerializeReducer { * \param max_nbyte maximum amount of memory needed to serialize each object * this includes budget limit for intermediate and final result * \param count number of elements to be reduced - * \param prepare_func Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) + * \param prepare_fun Lazy preprocessing function, if it is not NULL, prepare_fun(prepare_arg) * will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_. * If the result of Allreduce can be recovered directly, then prepare_func will NOT be called * \param prepare_arg argument used to passed into the lazy preprocessing function diff --git a/include/rabit_serializable.h b/include/rabit_serializable.h index 0b2ccf3cb..bf90593c8 100644 --- a/include/rabit_serializable.h +++ b/include/rabit_serializable.h @@ -1,6 +1,6 @@ /*! * Copyright (c) 2014 by Contributors - * \file serializable.h + * \file rabit_serializable.h * \brief defines serializable interface of rabit * \author Tianqi Chen */ From 61f21859d943ba413a35f9239b86dc982ce309ab Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 1 Jan 2015 05:57:46 -0800 Subject: [PATCH 163/531] add api --- README.md | 4 +++- guide/README.md | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0517473a5..2834bfd22 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementations of distributed machine learning programs, many of which fall naturally under the Allreduce abstraction. -* See the [package interface file](include/rabit.h) +* [Guide Tutorial of Rabit](guide) +* [API Documentation](http://home.cs.washington.edu/~tqchen/rabit/doc) +* You can also directly read the [interface header](include/rabit.h) Features ==== diff --git a/guide/README.md b/guide/README.md index 311681aac..0166521f7 100644 --- a/guide/README.md +++ b/guide/README.md @@ -3,12 +3,16 @@ Tutorial of Rabit This is an tutorial of rabit, a ***Reliable Allreduce and Broadcast interface***. To run the examples locally, you will need to type ```make``` to build all the examples. +Please also refer to the [API Documentation](http://home.cs.washington.edu/~tqchen/rabit/doc) + + **List of Topics** * [What is Allreduce](#what-is-allreduce) * [Common Use Case](#common-use-case) * [Structure of Rabit Program](#structure-of-rabit-program) * [Fault Tolerance](#fault-tolerance) + What is Allreduce ===== The main method provided by rabit are Allreduce and Broadcast. Allreduce performs reduction across different computation nodes, From 08ca3b084982504ffbbb01e59c156de53e671a58 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 1 Jan 2015 06:02:32 -0800 Subject: [PATCH 164/531] add more links --- guide/README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/guide/README.md b/guide/README.md index 0166521f7..9d7fa2729 100644 --- a/guide/README.md +++ b/guide/README.md @@ -11,7 +11,9 @@ Please also refer to the [API Documentation](http://home.cs.washington.edu/~tqch * [Common Use Case](#common-use-case) * [Structure of Rabit Program](#structure-of-rabit-program) * [Fault Tolerance](#fault-tolerance) - +* [Running Rabit Jobs](#running-rabit-jobs) + - [Running Rabit on Hadoop](#running-rabit-on-hadoop) + - [Running Rabit using MPI](#running-rabit-using-mpi) What is Allreduce ===== @@ -163,3 +165,15 @@ touching the disk. This makes rabit program more reliable and efficient. This is an conceptual introduction to the fault tolerant model of rabit. The actual implementation is more sophiscated, and can deal with more complicated cases such as multiple nodes failure and node failure during recovery phase. + +Running Rabit Jobs +==== +TODO + +Running Rabit on Hadoop +==== +TODO + +Running Rabit using MPI +==== +TODO From eb2b086b65d1858d9fe290fd1a79e84997ec10d6 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 1 Jan 2015 06:04:02 -0800 Subject: [PATCH 165/531] ok --- guide/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/guide/README.md b/guide/README.md index 9d7fa2729..3666f3d92 100644 --- a/guide/README.md +++ b/guide/README.md @@ -168,12 +168,13 @@ and can deal with more complicated cases such as multiple nodes failure and node Running Rabit Jobs ==== +* To run demo locally, use [rabit_demo.py](../tracker/rabit_demo.py) TODO Running Rabit on Hadoop ==== -TODO +TODO, use [rabit_hadoop.py](../tracker/rabit_hadoop.py) Running Rabit using MPI ==== -TODO +TODO, use [rabit_mpi.py](../tracker/rabit_mpi.py) or directly use mpirun if compiled with MPI backend. From d10a435d6477cd765e3c7ef8453f128802b5c6d5 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 1 Jan 2015 06:06:02 -0800 Subject: [PATCH 166/531] correct --- README.md | 2 +- guide/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2834bfd22..45e6bd1b4 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementations of distributed machine learning programs, many of which fall naturally under the Allreduce abstraction. * [Guide Tutorial of Rabit](guide) -* [API Documentation](http://home.cs.washington.edu/~tqchen/rabit/doc) +* [API Documentation](http://homes.cs.washington.edu/~tqchen/rabit/doc) * You can also directly read the [interface header](include/rabit.h) Features diff --git a/guide/README.md b/guide/README.md index 3666f3d92..c019440eb 100644 --- a/guide/README.md +++ b/guide/README.md @@ -3,7 +3,7 @@ Tutorial of Rabit This is an tutorial of rabit, a ***Reliable Allreduce and Broadcast interface***. To run the examples locally, you will need to type ```make``` to build all the examples. -Please also refer to the [API Documentation](http://home.cs.washington.edu/~tqchen/rabit/doc) +Please also refer to the [API Documentation](http://homes.cs.washington.edu/~tqchen/rabit/doc) **List of Topics** From be355c1e605899508fe2fd8ec209c7bb61725522 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 1 Jan 2015 06:06:55 -0800 Subject: [PATCH 167/531] minor --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 45e6bd1b4..32fdc6fd2 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementations of distributed machine learning programs, many of which fall naturally under the Allreduce abstraction. -* [Guide Tutorial of Rabit](guide) +* [Tutorial of Rabit](guide) * [API Documentation](http://homes.cs.washington.edu/~tqchen/rabit/doc) * You can also directly read the [interface header](include/rabit.h) From 1b4921977f164209a1a0fea07668d365510c499e Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 3 Jan 2015 05:20:18 -0800 Subject: [PATCH 168/531] update doc --- Makefile | 8 +- guide/README.md | 112 +++++++++++++++++--- include/rabit.h | 27 ++++- include/rabit_serializable.h | 2 +- lib/README | 1 - lib/README.md | 15 +++ test/config.h | 196 ----------------------------------- test/mock.h | 121 --------------------- tracker/rabit_demo.py | 4 +- tracker/rabit_hadoop.py | 4 +- tracker/rabit_mpi.py | 8 +- 11 files changed, 149 insertions(+), 349 deletions(-) delete mode 100644 lib/README create mode 100644 lib/README.md delete mode 100644 test/config.h delete mode 100644 test/mock.h diff --git a/Makefile b/Makefile index 64cba30fa..27c2b1915 100644 --- a/Makefile +++ b/Makefile @@ -4,15 +4,17 @@ export MPICXX = mpicxx export LDFLAGS= export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -Iinclude -BPATH=lib +# build path +BPATH=. # objectives that makes up rabit library MPIOBJ= $(BPATH)/engine_mpi.o OBJ= $(BPATH)/allreduce_base.o $(BPATH)/allreduce_robust.o $(BPATH)/engine.o $(BPATH)/engine_empty.o $(BPATH)/engine_mock.o ALIB= lib/librabit.a lib/librabit_mpi.a lib/librabit_empty.a lib/librabit_mock.a HEADERS=src/*.h include/*.h include/rabit/*.h -.PHONY: clean all +.PHONY: clean all install mpi -all: $(ALIB) +all: lib/librabit.a lib/librabit_mock.a +mpi: lib/librabit_mpi.a $(BPATH)/allreduce_base.o: src/allreduce_base.cc $(HEADERS) $(BPATH)/engine.o: src/engine.cc $(HEADERS) diff --git a/guide/README.md b/guide/README.md index c019440eb..2ea6dccc8 100644 --- a/guide/README.md +++ b/guide/README.md @@ -10,10 +10,12 @@ Please also refer to the [API Documentation](http://homes.cs.washington.edu/~tqc * [What is Allreduce](#what-is-allreduce) * [Common Use Case](#common-use-case) * [Structure of Rabit Program](#structure-of-rabit-program) -* [Fault Tolerance](#fault-tolerance) +* [Compile Programs with Rabit](#compile-programs-with-rabit) * [Running Rabit Jobs](#running-rabit-jobs) - [Running Rabit on Hadoop](#running-rabit-on-hadoop) - [Running Rabit using MPI](#running-rabit-using-mpi) + - [Customize Tracker Script](#customize-tracker-script) +* [Fault Tolerance](#fault-tolerance) What is Allreduce ===== @@ -137,7 +139,101 @@ to Allreduce or Broadcasts and update the model to a new one. The calling sequen * When a node goes down, the rest of the node will block in the call of Allreduce/Broadcast and helps the recovery of the failure nodes, util it catches up. -Please also see the next section for introduction of fault tolerance procedure in rabit. +Please also see the section of [fault tolerance procedure](#fault-tolerance) in rabit to understand the recovery procedure under going in rabit + +Compile Programs with Rabit +==== +Rabit is a portable library, to use it, you only need to include the rabit header file. +* You will need to add path to [../include](../include) to the header search path of compiler + - Solution 1: add ```-I/path/to/rabit/include``` to the compiler flag in gcc or clang + - Solution 2: add the path to enviroment variable CPLUS_INCLUDE_PATH +* You will need to add path to [../lib](../lib) to the library search path of compiler + - Solution 1: add ```-L/path/to/rabit/lib``` to the linker flag + - Solution 2: add the path to enviroment variable LIBRARY_PATH AND LD_LIBRARY_PATH +* Link against lib/rabit.a + - Add ```-lrabit``` to linker flag + +The procedure above allows you to compile a program with rabit. The following two sections are additional +advanced options you can take to link against different backend other than the normal one. + +#### Link against MPI Allreduce +You can link against ```rabit_mpi.a``` instead to use MPI Allreduce, however, the resulting program is backed by MPI and +is not fault tolerant anymore. +* Simply change linker flag from ```-lrabit``` to ```-lrabit_mpi``` +* The final linking needs to be done by mpi wrapper compiler ```mpicxx``` + +#### Link against Mock Test Rabit Library +If you want to mock test the program to see the behavior of the code when some nodes goes down. You can link against ```rabit_mock.a``` . +* Simply change linker flag from ```-lrabit``` to ```-lrabit_mock``` + +The resulting rabit program can take in additional arguments in format of +``` +mock=rank,version,seq,ndeath +``` + +The four integers specifies an event that will cause the program to suicide(exit with -2) +* rank specifies the rank of the node +* version specifies the current version(iteration) of the model +* seq specifies the sequence number of Allreduce/Broadcast call since last checkpoint +* ndeath specifies how many times this node died already + +For example, consider the following script in the test case +```bash +../tracker/rabit_demo.py -n 10 test_model_recover 10000\ + mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 +``` +* The first mock will cause node 0 to exit when calling second Allreduce/Broadcast (seq = 1) in iteration 0 +* The second mock will cause node 1 to exit when calling second Allreduce/Broadcast (seq = 1) in iteration 1 +* The second mock will cause node 0 to exit again when calling second Allreduce/Broadcast (seq = 1) in iteration 1 + - Note that ndeath = 1 means this will happen only if node 0 died once, which is our case + +Running Rabit Jobs +==== +Rabit is a portable library that can run on multiple platforms. + +#### Running Rabit Locally +* You can use [../tracker/rabit_demo.py](../tracker/rabit_demo.py) to start n process locally +* This script will restart the program when it exits with -2, so it can be used for [mock test](#link-against-mock-test-library) + +#### Running Rabit on Hadoop +* You can use [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) to run rabit program on hadoop +* This will start n rabit program as mapper of MapReduce +* Each program can read its part of data from stdin +* Yarn is highly recommended, since Yarn allows specifying ncpu and memory of each mapper + - This allows multi-threading programs in each node, which can be more efficient + - A good possible practice is OpenMP-rabit hybrid code + +#### Running Rabit on Yarn +* To Be modified from [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) + +#### Running Rabit using MPI +* You can submit rabit programs to MPI cluster using [../tracker/rabit_mpi.py](../tracker/rabit_mpi.py). +* If you linked your code against librabit_mpi.a, then you can directly use mpirun to submit the job + +#### Customize Tracker Script +You can also modify the tracker script to allow rabit run on other platforms. To do so, refer to the existing +tracker script such as [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) and [../tracker/rabit_mpi.py](../tracker/rabit_mpi.py) + +You will need to implement a platform dependent submission function with the following definition +```python +def fun_submit(nslave, slave_args): + """ + customized submit script, that submit nslave jobs, + each must contain args as parameter + note this can be a lambda closure + Parameters + nslave number of slave process to start up + worker_args tracker information which must be passed to the arguments + this usually includes the parameters of master_uri and port etc. + """ +``` +The submission function should start nslave process in the platform, and append slave_args to the end of other arguments. +Then we can simply call ```tracker.submit``` with fun_submit to submit jobs in the target platform + +Note that the current rabit tracker do not restart a worker when it dies, the job of fail-restart thus lies on the platform itself or we should write +fail-restart logic in the customization script. +* Fail-restart is usually provided by most platforms. +* For example, mapreduce will restart a mapper when it fails Fault Tolerance ===== @@ -166,15 +262,3 @@ touching the disk. This makes rabit program more reliable and efficient. This is an conceptual introduction to the fault tolerant model of rabit. The actual implementation is more sophiscated, and can deal with more complicated cases such as multiple nodes failure and node failure during recovery phase. -Running Rabit Jobs -==== -* To run demo locally, use [rabit_demo.py](../tracker/rabit_demo.py) -TODO - -Running Rabit on Hadoop -==== -TODO, use [rabit_hadoop.py](../tracker/rabit_hadoop.py) - -Running Rabit using MPI -==== -TODO, use [rabit_mpi.py](../tracker/rabit_mpi.py) or directly use mpirun if compiled with MPI backend. diff --git a/include/rabit.h b/include/rabit.h index 7940b8616..d3620d8d0 100644 --- a/include/rabit.h +++ b/include/rabit.h @@ -25,15 +25,29 @@ /*! \brief namespace of rabit */ namespace rabit { -/*! \brief namespace of operator */ +/*! + * \brief namespace of reduction operators + */ namespace op { -/*! \brief maximum value */ +/*! + * \class rabit::op::Max + * \brief maximum reduction operator + */ struct Max; -/*! \brief minimum value */ +/*! + * \class rabit::op::Min + * \brief minimum reduction operator + */ struct Min; -/*! \brief perform sum */ +/*! + * \class rabit::op::Sum + * \brief sum reduction operator + */ struct Sum; -/*! \brief perform bitwise OR */ +/*! + * \class rabit::op::BitOR + * \brief bitwise or reduction operator + */ struct BitOR; } // namespace op /*! @@ -75,6 +89,7 @@ inline void TrackerPrintf(const char *fmt, ...); #endif /*! * \brief broadcast an memory region to all others from root + * * Example: int a = 1; Broadcast(&a, sizeof(a), root); * \param sendrecv_data the pointer to send or recive buffer, * \param size the size of the data @@ -101,6 +116,7 @@ inline void Broadcast(std::string *sendrecv_data, int root); /*! * \brief perform in-place allreduce, on sendrecvbuf * this function is NOT thread-safe + * * Example Usage: the following code gives sum of the result * vector data(10); * ... @@ -125,6 +141,7 @@ inline void Allreduce(DType *sendrecvbuf, size_t count, /*! * \brief perform in-place allreduce, on sendrecvbuf * with a prepare function specified by lambda function + * * Example Usage: the following code gives sum of the result * vector data(10); * ... diff --git a/include/rabit_serializable.h b/include/rabit_serializable.h index bf90593c8..f6c1423c7 100644 --- a/include/rabit_serializable.h +++ b/include/rabit_serializable.h @@ -88,7 +88,7 @@ class IStream { } }; -/*! \brief interface of se*/ +/*! \brief interface of serializable objects */ class ISerializable { public: /*! \brief load the model from file */ diff --git a/lib/README b/lib/README deleted file mode 100644 index c734c76ab..000000000 --- a/lib/README +++ /dev/null @@ -1 +0,0 @@ -This folder holds the library file generated by the compiler \ No newline at end of file diff --git a/lib/README.md b/lib/README.md new file mode 100644 index 000000000..b6a5aa8b2 --- /dev/null +++ b/lib/README.md @@ -0,0 +1,15 @@ +Rabit Library +===== +This folder holds the library file generated by the compiler. To generate the library file, type ```make``` in the project root folder. If you want mpi compatible library, type ```make mpi``` + +***List of Files*** +* rabit.a The rabit package library + - Normally you need to link with this one +* rabit_mock.a The rabit package library with mock test + - This library allows additional mock-test +* rabit_mpi.a The MPI backed library + - Link against this library makes the program use MPI Allreduce + - This library is not fault-tolerant +* rabit_empty.a Dummy package implementation + - This is an empty library that does not provide anything + - Only introduced to minimize code dependency for projects that only need single machine code diff --git a/test/config.h b/test/config.h deleted file mode 100644 index 467e8f63e..000000000 --- a/test/config.h +++ /dev/null @@ -1,196 +0,0 @@ -#ifndef RABIT_UTILS_CONFIG_H_ -#define RABIT_UTILS_CONFIG_H_ -/*! - * \file config.h - * \brief helper class to load in configures from file - * \author Tianqi Chen - */ -#include -#include -#include -#include -#include -#include "./rabit/utils.h" - -namespace rabit { -namespace utils { -/*! - * \brief base implementation of config reader - */ -class ConfigReaderBase { - public: - /*! - * \brief get current name, called after Next returns true - * \return current parameter name - */ - inline const char *name(void) const { - return s_name; - } - /*! - * \brief get current value, called after Next returns true - * \return current parameter value - */ - inline const char *val(void) const { - return s_val; - } - /*! - * \brief move iterator to next position - * \return true if there is value in next position - */ - inline bool Next(void) { - while (!this->IsEnd()) { - GetNextToken(s_name); - if (s_name[0] == '=') return false; - if (GetNextToken( s_buf ) || s_buf[0] != '=') return false; - if (GetNextToken( s_val ) || s_val[0] == '=') return false; - return true; - } - return false; - } - // called before usage - inline void Init(void) { - ch_buf = this->GetChar(); - } - - protected: - /*! - * \brief to be implemented by subclass, - * get next token, return EOF if end of file - */ - virtual char GetChar(void) = 0; - /*! \brief to be implemented by child, check if end of stream */ - virtual bool IsEnd(void) = 0; - - private: - char ch_buf; - char s_name[100000], s_val[100000], s_buf[100000]; - - inline void SkipLine(void) { - do { - ch_buf = this->GetChar(); - } while (ch_buf != EOF && ch_buf != '\n' && ch_buf != '\r'); - } - - inline void ParseStr(char tok[]) { - int i = 0; - while ((ch_buf = this->GetChar()) != EOF) { - switch (ch_buf) { - case '\\': tok[i++] = this->GetChar(); break; - case '\"': tok[i++] = '\0'; return; - case '\r': - case '\n': Error("ConfigReader: unterminated string"); - default: tok[i++] = ch_buf; - } - } - Error("ConfigReader: unterminated string"); - } - inline void ParseStrML(char tok[]) { - int i = 0; - while ((ch_buf = this->GetChar()) != EOF) { - switch (ch_buf) { - case '\\': tok[i++] = this->GetChar(); break; - case '\'': tok[i++] = '\0'; return; - default: tok[i++] = ch_buf; - } - } - Error("unterminated string"); - } - // return newline - inline bool GetNextToken(char tok[]) { - int i = 0; - bool new_line = false; - while (ch_buf != EOF) { - switch (ch_buf) { - case '#' : SkipLine(); new_line = true; break; - case '\"': - if (i == 0) { - ParseStr(tok); ch_buf = this->GetChar(); return new_line; - } else { - Error("ConfigReader: token followed directly by string"); - } - case '\'': - if (i == 0) { - ParseStrML( tok ); ch_buf = this->GetChar(); return new_line; - } else { - Error("ConfigReader: token followed directly by string"); - } - case '=': - if (i == 0) { - ch_buf = this->GetChar(); - tok[0] = '='; - tok[1] = '\0'; - } else { - tok[i] = '\0'; - } - return new_line; - case '\r': - case '\n': - if (i == 0) new_line = true; - case '\t': - case ' ' : - ch_buf = this->GetChar(); - if (i > 0) { - tok[i] = '\0'; - return new_line; - } - break; - default: - tok[i++] = ch_buf; - ch_buf = this->GetChar(); - break; - } - } - return true; - } -}; -/*! - * \brief an iterator use stream base, allows use all types of istream - */ -class ConfigStreamReader: public ConfigReaderBase { - public: - /*! - * \brief constructor - * \param istream input stream - */ - explicit ConfigStreamReader(std::istream &fin) : fin(fin) {} - - protected: - virtual char GetChar(void) { - return fin.get(); - } - /*! \brief to be implemented by child, check if end of stream */ - virtual bool IsEnd(void) { - return fin.eof(); - } - - private: - std::istream &fin; -}; - -/*! - * \brief an iterator that iterates over a configure file and gets the configures - */ -class ConfigIterator: public ConfigStreamReader { - public: - /*! - * \brief constructor - * \param fname name of configure file - */ - explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi) { - fi.open(fname); - if (fi.fail()) { - utils::Error("cannot open file %s", fname); - } - ConfigReaderBase::Init(); - } - /*! \brief destructor */ - ~ConfigIterator(void) { - fi.close(); - } - - private: - std::ifstream fi; -}; -} // namespace utils -} // namespace rabit -#endif // RABIT_UTILS_CONFIG_H_ diff --git a/test/mock.h b/test/mock.h deleted file mode 100644 index 17e5b75c9..000000000 --- a/test/mock.h +++ /dev/null @@ -1,121 +0,0 @@ -#ifndef RABIT_MOCK_H -#define RABIT_MOCK_H -/*! - * \file mock.h - * \brief This file defines a mock object to test the system - * \author Ignacio Cano - */ -#include "./rabit.h" -#include "./config.h" -#include -#include -#include - -struct MockException { -}; - -namespace rabit { -/*! \brief namespace of mock */ -namespace test { - -class Mock { - - -public: - - explicit Mock(const int& rank, char *config, char* round_dir) : rank(rank) { - Init(config, round_dir); - } - - template - inline void Allreduce(float *sendrecvbuf, size_t count) { - utils::Assert(verify(allReduce), "[%d] error when calling allReduce", rank); - rabit::Allreduce(sendrecvbuf, count); - } - -inline int LoadCheckPoint(ISerializable *global_model, - ISerializable *local_model) { - utils::Assert(verify(loadCheckpoint), "[%d] error when loading checkpoint", rank); - return rabit::LoadCheckPoint(global_model, local_model); - } - - inline void CheckPoint(const ISerializable *global_model, - const ISerializable *local_model) { - utils::Assert(verify(checkpoint), "[%d] error when checkpointing", rank); - rabit::CheckPoint(global_model, local_model); - } - - inline void Broadcast(std::string *sendrecv_data, int root) { - utils::Assert(verify(broadcast), "[%d] error when broadcasting", rank); - rabit::Broadcast(sendrecv_data, root); - - } - -private: - - inline void Init(char* config, char* round_dir) { - std::stringstream ss; - ss << round_dir << "node" << rank << ".round"; - const char* round_file = ss.str().c_str(); - std::ifstream ifs(round_file); - int current_round = 1; - if (!ifs.good()) { - // file does not exists, it's the first time, so save the current round to 1 - std::ofstream ofs(round_file); - ofs << current_round; - ofs.close(); - } else { - // file does exists, read the previous round, increment by one, and save it back - ifs >> current_round; - current_round++; - ifs.close(); - std::ofstream ofs(round_file); - ofs << current_round; - ofs.close(); - } - printf("[%d] in round %d\n", rank, current_round); - utils::ConfigIterator itr(config); - while (itr.Next()) { - char round[4], node_rank[4]; - sscanf(itr.name(), "%[^_]_%s", round, node_rank); - int i_node_rank = atoi(node_rank); - // if it's something for me - if (i_node_rank == rank) { - int i_round = atoi(round); - // in my current round - if (i_round == current_round) { - printf("[%d] round %d, value %s\n", rank, i_round, itr.val()); - if (strcmp("allreduce", itr.val())) record(allReduce); - else if (strcmp("broadcast", itr.val())) record(broadcast); - else if (strcmp("loadcheckpoint", itr.val())) record(loadCheckpoint); - else if (strcmp("checkpoint", itr.val())) record(checkpoint); - } - } - } - } - - inline void record(std::map& m) { - m[rank] = false; - } - - inline bool verify(std::map& m) { - bool result = true; - if (m.find(rank) != m.end()) { - result = m[rank]; - } - return result; - } - - int rank; - std::map allReduce; - std::map broadcast; - std::map loadCheckpoint; - std::map checkpoint; - - -}; - -} // namespace test -} // namespace rabit - -#endif // RABIT_MOCK_H diff --git a/tracker/rabit_demo.py b/tracker/rabit_demo.py index aeeb6e9a3..ba14554ab 100755 --- a/tracker/rabit_demo.py +++ b/tracker/rabit_demo.py @@ -37,7 +37,7 @@ def exec_cmd(cmd, taskid): # Note: this submit script is only used for demo purpose # submission script using pyhton multi-threading # -def mthread_submit(nslave, slave_args): +def mthread_submit(nslave, worker_args): """ customized submit script, that submit nslave jobs, each must contain args as parameter note this can be a lambda function containing additional parameters in input @@ -48,7 +48,7 @@ def mthread_submit(nslave, slave_args): """ procs = {} for i in range(nslave): - procs[i] = Thread(target = exec_cmd, args = (args.command + slave_args, i)) + procs[i] = Thread(target = exec_cmd, args = (args.command + worker_args, i)) procs[i].start() for i in range(nslave): procs[i].join() diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index d25fae7c0..59866d55a 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -65,11 +65,11 @@ args = parser.parse_args() if args.jobname is None: args.jobname = ('Rabit(nworker=%d):' % args.nworker) + args.command[0].split('/')[-1]; -def hadoop_streaming(nworker, slave_args): +def hadoop_streaming(nworker, worker_args): cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) cmd += ' -D mapred.job.name=%d' % (a) cmd += ' -input %s -output %s' % (args.input, args.output) - cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) + cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + worker_args)) fset = set() if args.auto_file_cache: for f in args.command: diff --git a/tracker/rabit_mpi.py b/tracker/rabit_mpi.py index 604ed3bf7..599a9a7c5 100755 --- a/tracker/rabit_mpi.py +++ b/tracker/rabit_mpi.py @@ -22,7 +22,7 @@ args = parser.parse_args() # # submission script using MPI # -def mpi_submit(nslave, slave_args): +def mpi_submit(nslave, worker_args): """ customized submit script, that submit nslave jobs, each must contain args as parameter note this can be a lambda function containing additional parameters in input @@ -31,11 +31,11 @@ def mpi_submit(nslave, slave_args): args arguments to launch each job this usually includes the parameters of master_uri and parameters passed into submit """ - sargs = ' '.join(args.command + slave_args) + sargs = ' '.join(args.command + worker_args) if args.hostfile is None: - cmd = ' '.join(['mpirun -n %d' % (nslave)] + args.command + slave_args) + cmd = ' '.join(['mpirun -n %d' % (nslave)] + args.command + worker_args) else: - ' '.join(['mpirun -n %d --hostfile %s' % (nslave, args.hostfile)] + args.command + slave_args) + ' '.join(['mpirun -n %d --hostfile %s' % (nslave, args.hostfile)] + args.command + worker_args) print cmd subprocess.check_call(cmd, shell = True) From 697a01bfb4dcc8d747a5bca0c83dfed4206ef6e8 Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sat, 10 Jan 2015 10:54:12 +0800 Subject: [PATCH 169/531] har -> jar --- tracker/rabit_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 59866d55a..bd0abc0c4 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -21,7 +21,7 @@ if hadoop_home != None: hadoop_binary = hadoop_home + '/bin/hadoop' assert os.path.exists(hadoop_binary), "HADDOP_HOME does not contain the hadoop binary" if hadoop_streaming_jar == None: - hadoop_streaming_jar = hadoop_home + '/lib/hadoop-streaming.har' + hadoop_streaming_jar = hadoop_home + '/lib/hadoop-streaming.jar' assert os.path.exists(hadoop_streaming_jar), "HADDOP_HOME does not contain the haddop streaming jar" if hadoop_binary == None or hadoop_streaming_jar == None: From 7f5cb3aa0e64c88f05f10c26ae346e06b043280b Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sat, 10 Jan 2015 10:58:53 +0800 Subject: [PATCH 170/531] modify hs --- tracker/rabit_hadoop.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index bd0abc0c4..01d242e31 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -25,7 +25,7 @@ if hadoop_home != None: assert os.path.exists(hadoop_streaming_jar), "HADDOP_HOME does not contain the haddop streaming jar" if hadoop_binary == None or hadoop_streaming_jar == None: - print 'Warning: Cannot auto-detect path to hadoop and streaming jar, need to set them via arguments -hs and -hb' + print 'Warning: Cannot auto-detect path to hadoop and hadoop-streaming jar, need to set them via arguments -hs and -hb' print '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14' parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using Hadoop Streaming') @@ -53,10 +53,10 @@ else: help="path-to-hadoop binary folder") if hadoop_streaming_jar == None: - parser.add_argument('-jar', '--hadoop_streaming_jar', required = True, + parser.add_argument('-hs', '--hadoop_streaming_jar', required = True, help='path-to hadoop streamimg jar file') else: - parser.add_argument('-jar', '--hadoop_streaming_jar', default = hadoop_streaming_jar, + parser.add_argument('-hs', '--hadoop_streaming_jar', default = hadoop_streaming_jar, help='path-to hadoop streamimg jar file') parser.add_argument('command', nargs='+', help = 'command for rabit program') From d986693fbd4c021bf582d931510c3a9523751738 Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sun, 11 Jan 2015 00:14:37 +0800 Subject: [PATCH 171/531] fix bugs --- tracker/rabit_hadoop.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 01d242e31..30d21748f 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -12,7 +12,7 @@ import rabit_tracker as tracker #!!! Set path to hadoop and hadoop streaming jar here hadoop_binary = 'hadoop' -hadoop_streaming_jar = None +hadoop_streaming_jar = None # code hadoop_home = os.getenv('HADOOP_HOME') @@ -67,7 +67,7 @@ if args.jobname is None: def hadoop_streaming(nworker, worker_args): cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) - cmd += ' -D mapred.job.name=%d' % (a) + cmd += ' -D mapred.job.name=%s' % (args.jobname) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + worker_args)) fset = set() @@ -75,9 +75,10 @@ def hadoop_streaming(nworker, worker_args): for f in args.command: if os.path.exists(f): fset.add(f) - for flst in args.files: - for f in flst.split('#'): - fset.add(f) + if args.files != None: + for flst in args.files: + for f in flst.split('#'): + fset.add(f) for f in fset: cmd += ' -file %s' % f print cmd From 76c15dffdef6b0dbf792d111096a913c3bbe283c Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sun, 11 Jan 2015 00:16:05 +0800 Subject: [PATCH 172/531] remove blank --- tracker/rabit_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 30d21748f..e9b53afbd 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -12,7 +12,7 @@ import rabit_tracker as tracker #!!! Set path to hadoop and hadoop streaming jar here hadoop_binary = 'hadoop' -hadoop_streaming_jar = None +hadoop_streaming_jar = None # code hadoop_home = os.getenv('HADOOP_HOME') From 6b30fb2bea6475a1f3cf205d8632031173eebd1c Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 10 Jan 2015 09:58:10 -0800 Subject: [PATCH 173/531] update cache script --- tracker/rabit_hadoop.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index e9b53afbd..da9e161ce 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -41,7 +41,9 @@ parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type= help = 'whether automatically cache the files in the command to hadoop localfile, this is on by default') parser.add_argument('-f', '--files', nargs = '*', help = 'the cached file list in mapreduce,'\ - ' the submission script will automatically cache all the files which appears in command.'\ + ' the submission script will automatically cache all the files which appears in command to local folder'\ + ' This will also cause rewritten of all the file names in the command to current path,'\ + ' for example `../../kmeans ../kmeans.conf` will be rewritten to ./kmeans kmeans.conf because the two files are cached to running folder.'\ ' You may need this option to cache additional files.'\ ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', help = 'customize jobname in tracker') @@ -66,15 +68,20 @@ if args.jobname is None: args.jobname = ('Rabit(nworker=%d):' % args.nworker) + args.command[0].split('/')[-1]; def hadoop_streaming(nworker, worker_args): + fset = set() + if args.auto_file_cache: + for i in range(len(args.command)): + f = args.command[i] + if os.path.exists(f): + fset.add(f) + if i == 0: + args.command[i] = './' + args.command[i].split('/')[-1] + else: + args.command[i] = args.command[i].split('/')[-1] cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) cmd += ' -D mapred.job.name=%s' % (args.jobname) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + worker_args)) - fset = set() - if args.auto_file_cache: - for f in args.command: - if os.path.exists(f): - fset.add(f) if args.files != None: for flst in args.files: for f in flst.split('#'): From c2ab64afe3bf95a72ea579bc8505cb4481213955 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 10 Jan 2015 10:01:31 -0800 Subject: [PATCH 174/531] fix comment --- tracker/rabit_hadoop.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index da9e161ce..aaed47cb0 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -43,7 +43,8 @@ parser.add_argument('-f', '--files', nargs = '*', help = 'the cached file list in mapreduce,'\ ' the submission script will automatically cache all the files which appears in command to local folder'\ ' This will also cause rewritten of all the file names in the command to current path,'\ - ' for example `../../kmeans ../kmeans.conf` will be rewritten to ./kmeans kmeans.conf because the two files are cached to running folder.'\ + ' for example `../../kmeans ../kmeans.conf` will be rewritten to `./kmeans kmeans.conf`'\ + ' because the two files are cached to running folder.'\ ' You may need this option to cache additional files.'\ ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', help = 'customize jobname in tracker') From 500a57697d72ac738abc073d67ae215b680f4ba3 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 10 Jan 2015 17:45:53 -0800 Subject: [PATCH 175/531] chg script --- tracker/rabit_hadoop.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index aaed47cb0..f037387e4 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -48,6 +48,11 @@ parser.add_argument('-f', '--files', nargs = '*', ' You may need this option to cache additional files.'\ ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', help = 'customize jobname in tracker') +parser.add_argument('--timeout', default=600000000, type=int, + help = 'timeout of each mapper job, automatically set to a very long time normally you donot need to set this ') +parser.add_argument('-m', '--memory_mb', default=-1, type=int, + help = 'maximum memory used by the process, Guide: set it large (near mapred.cluster.max.map.memory.mb) if you are running multi-threading rabit,'\ + 'so that each node can occupy all the mapper slots in a machine for maximum performance') if hadoop_binary == None: parser.add_argument('-hb', '--hadoop_binary', required = True, help="path-to-hadoop binary folder") @@ -80,7 +85,9 @@ def hadoop_streaming(nworker, worker_args): else: args.command[i] = args.command[i].split('/')[-1] cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) - cmd += ' -D mapred.job.name=%s' % (args.jobname) + cmd += ' -Dmapred.job.name=%s' % (args.jobname) + cmd += ' -Dmapred.task.timeout=%d' % (args.timeout) + cmd += ' -Dmapred.job.map.memory.mb=%d' % (args.memory_mb) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + worker_args)) if args.files != None: From 43c129f431130fb8c5628cbc9565ca750f346d14 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 10 Jan 2015 17:49:09 -0800 Subject: [PATCH 176/531] chg script --- tracker/rabit_hadoop.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index f037387e4..470455cb6 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -49,9 +49,11 @@ parser.add_argument('-f', '--files', nargs = '*', ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', help = 'customize jobname in tracker') parser.add_argument('--timeout', default=600000000, type=int, - help = 'timeout of each mapper job, automatically set to a very long time normally you donot need to set this ') + help = 'timeout of each mapper job, automatically set to a very long time,'\ + 'normally you do not need to set this ') parser.add_argument('-m', '--memory_mb', default=-1, type=int, - help = 'maximum memory used by the process, Guide: set it large (near mapred.cluster.max.map.memory.mb) if you are running multi-threading rabit,'\ + help = 'maximum memory used by the process, Guide: set it large (near mapred.cluster.max.map.memory.mb)'\ + 'if you are running multi-threading rabit,'\ 'so that each node can occupy all the mapper slots in a machine for maximum performance') if hadoop_binary == None: parser.add_argument('-hb', '--hadoop_binary', required = True, From 0100fdd18d67dbf365f10ee9623c97f24a566005 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 10 Jan 2015 21:21:39 -0800 Subject: [PATCH 177/531] auto jobname --- tracker/rabit_hadoop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 470455cb6..d0241dcb1 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -47,7 +47,7 @@ parser.add_argument('-f', '--files', nargs = '*', ' because the two files are cached to running folder.'\ ' You may need this option to cache additional files.'\ ' You can also use it to manually cache files when auto_file_cache is off') -parser.add_argument('--jobname', help = 'customize jobname in tracker') +parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker') parser.add_argument('--timeout', default=600000000, type=int, help = 'timeout of each mapper job, automatically set to a very long time,'\ 'normally you do not need to set this ') @@ -72,7 +72,7 @@ parser.add_argument('command', nargs='+', help = 'command for rabit program') args = parser.parse_args() -if args.jobname is None: +if args.jobname == 'auto': args.jobname = ('Rabit(nworker=%d):' % args.nworker) + args.command[0].split('/')[-1]; def hadoop_streaming(nworker, worker_args): From 3f4bf96c5d4c877790136fc174712217d151b879 Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sun, 11 Jan 2015 13:46:18 +0800 Subject: [PATCH 178/531] temp --- tracker/rabit_hadoop.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 470455cb6..809dfc1e4 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -12,7 +12,8 @@ import rabit_tracker as tracker #!!! Set path to hadoop and hadoop streaming jar here hadoop_binary = 'hadoop' -hadoop_streaming_jar = None +#hadoop_streaming_jar = None +hadoop_streaming_jar = '/home/likewise-open/APEXLAB/blchen/streaming.jar' # code hadoop_home = os.getenv('HADOOP_HOME') From 2e3361f0e03ee9cea2f74f94bfecfbf2ab98213a Mon Sep 17 00:00:00 2001 From: tqchen Date: Sat, 10 Jan 2015 22:49:56 -0800 Subject: [PATCH 179/531] fix -f --- tracker/rabit_hadoop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 59866d55a..e91eb4135 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -39,7 +39,7 @@ parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, help = 'print more messages into the console') parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type=int, help = 'whether automatically cache the files in the command to hadoop localfile, this is on by default') -parser.add_argument('-f', '--files', nargs = '*', +parser.add_argument('-f', '--files', default = [], action='append', help = 'the cached file list in mapreduce,'\ ' the submission script will automatically cache all the files which appears in command.'\ ' You may need this option to cache additional files.'\ @@ -63,7 +63,7 @@ parser.add_argument('command', nargs='+', args = parser.parse_args() if args.jobname is None: - args.jobname = ('Rabit(nworker=%d):' % args.nworker) + args.command[0].split('/')[-1]; + args.jobname = ('Rabit[nworker=%d]:' % args.nworker) + args.command[0].split('/')[-1]; def hadoop_streaming(nworker, worker_args): cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) From 7fa23f2d2f25aeadb35fe907a22a7a2b672440bf Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sun, 11 Jan 2015 14:52:48 +0800 Subject: [PATCH 180/531] modify default jobname --- tracker/rabit_hadoop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index a283e0ca7..20bb47e7f 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -50,7 +50,7 @@ parser.add_argument('-f', '--files', nargs = '*', ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker') parser.add_argument('--timeout', default=600000000, type=int, - help = 'timeout of each mapper job, automatically set to a very long time,'\ + help = 'timeout ((in milli seconds)) of each mapper job, automatically set to a very long time,'\ 'normally you do not need to set this ') parser.add_argument('-m', '--memory_mb', default=-1, type=int, help = 'maximum memory used by the process, Guide: set it large (near mapred.cluster.max.map.memory.mb)'\ @@ -74,7 +74,7 @@ parser.add_argument('command', nargs='+', args = parser.parse_args() if args.jobname == 'auto': - args.jobname = ('Rabit(nworker=%d):' % args.nworker) + args.command[0].split('/')[-1]; + args.jobname = ('Rabit[nworker=%d]:' % args.nworker) + args.command[0].split('/')[-1]; def hadoop_streaming(nworker, worker_args): fset = set() From 80b0d06b7e3ba9802da762d07b80e60230015071 Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sun, 11 Jan 2015 14:56:20 +0800 Subject: [PATCH 181/531] merger from tqchen --- tracker/rabit_hadoop.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 135f13df5..7808f9143 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -12,8 +12,7 @@ import rabit_tracker as tracker #!!! Set path to hadoop and hadoop streaming jar here hadoop_binary = 'hadoop' -#hadoop_streaming_jar = None -hadoop_streaming_jar = '/home/likewise-open/APEXLAB/blchen/streaming.jar' +hadoop_streaming_jar = None # code hadoop_home = os.getenv('HADOOP_HOME') @@ -50,7 +49,7 @@ parser.add_argument('-f', '--files', default = [], action='append', ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker') parser.add_argument('--timeout', default=600000000, type=int, - help = 'timeout ((in milli seconds)) of each mapper job, automatically set to a very long time,'\ + help = 'timeout (in milli seconds) of each mapper job, automatically set to a very long time,'\ 'normally you do not need to set this ') parser.add_argument('-m', '--memory_mb', default=-1, type=int, help = 'maximum memory used by the process, Guide: set it large (near mapred.cluster.max.map.memory.mb)'\ From c6d0be57d499f44872425b67fb217f63e8221709 Mon Sep 17 00:00:00 2001 From: Boliang Chen Date: Sun, 11 Jan 2015 15:39:50 +0800 Subject: [PATCH 182/531] explain timeout --- tracker/rabit_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 7808f9143..3d5977e14 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -49,7 +49,7 @@ parser.add_argument('-f', '--files', default = [], action='append', ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker') parser.add_argument('--timeout', default=600000000, type=int, - help = 'timeout (in milli seconds) of each mapper job, automatically set to a very long time,'\ + help = 'timeout (in million seconds) of each mapper job, automatically set to a very long time,'\ 'normally you do not need to set this ') parser.add_argument('-m', '--memory_mb', default=-1, type=int, help = 'maximum memory used by the process, Guide: set it large (near mapred.cluster.max.map.memory.mb)'\ From aea4c10847c05a103ad33359b76d4a544f6b7075 Mon Sep 17 00:00:00 2001 From: nachocano Date: Sun, 11 Jan 2015 01:07:51 -0800 Subject: [PATCH 183/531] cosmetic changes to tutorial --- README.md | 2 +- guide/README.md | 121 ++++++++++++++++++++++++------------------------ 2 files changed, 61 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 32fdc6fd2..b035b7437 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ rabit is a light weight library that provides a fault tolerant interface of Allreduce and Broadcast. It is designed to support easy implementations of distributed machine learning programs, many of which fall naturally under the Allreduce abstraction. -* [Tutorial of Rabit](guide) +* [Tutorial](guide) * [API Documentation](http://homes.cs.washington.edu/~tqchen/rabit/doc) * You can also directly read the [interface header](include/rabit.h) diff --git a/guide/README.md b/guide/README.md index 2ea6dccc8..8764ae9a1 100644 --- a/guide/README.md +++ b/guide/README.md @@ -1,15 +1,15 @@ -Tutorial of Rabit +Tutorial ===== -This is an tutorial of rabit, a ***Reliable Allreduce and Broadcast interface***. -To run the examples locally, you will need to type ```make``` to build all the examples. +This is rabit's tutorial, a ***Reliable Allreduce and Broadcast Interface***. +To run the examples locally, you will need to build them with ```make```. -Please also refer to the [API Documentation](http://homes.cs.washington.edu/~tqchen/rabit/doc) +Please also refer to the [API Documentation](http://homes.cs.washington.edu/~tqchen/rabit/doc) for further details. **List of Topics** * [What is Allreduce](#what-is-allreduce) * [Common Use Case](#common-use-case) -* [Structure of Rabit Program](#structure-of-rabit-program) +* [Structure of a Rabit Program](#structure-of-rabit-program) * [Compile Programs with Rabit](#compile-programs-with-rabit) * [Running Rabit Jobs](#running-rabit-jobs) - [Running Rabit on Hadoop](#running-rabit-on-hadoop) @@ -19,8 +19,8 @@ Please also refer to the [API Documentation](http://homes.cs.washington.edu/~tqc What is Allreduce ===== -The main method provided by rabit are Allreduce and Broadcast. Allreduce performs reduction across different computation nodes, -and returning the results to all the nodes. To understand the behavior of the function. Consider the following example in [basic.cc](basic.cc). +The main methods provided by rabit are Allreduce and Broadcast. Allreduce performs reduction across different computation nodes, +and returns the result to every node. To understand the behavior of the function, consider the following example in [basic.cc](basic.cc). ```c++ #include using namespace rabit; @@ -41,24 +41,24 @@ int main(int argc, char *argv[]) { return 0; } ``` -You can run the example using the rabit_demo.py script. The following commmand -start rabit program with two worker processes. +You can run the example using the rabit_demo.py script. The following command +starts the rabit program with two worker processes. ```bash ../tracker/rabit_demo.py -n 2 basic.rabit ``` -This will start two process, one process with rank 0 and another rank 1, running the same code. -The ```rabit::GetRank()``` function return the rank of current process. +This will start two processes, one process with rank 0 and the other with rank 1, both processes run the same code. +The ```rabit::GetRank()``` function returns the rank of current process. -Before the call the allreduce, process 0 contains array ```a = {0, 1, 2}```, while process 1 have array -```a = {1, 2, 3}```. After the call of Allreduce, the array contents in all processes are replaced by the -reduction result (in this case, the maximum value in each position across all the processes). So after the +Before the call to Allreduce, process 0 contains the array ```a = {0, 1, 2}```, while process 1 has the array +```a = {1, 2, 3}```. After the call to Allreduce, the array contents in all processes are replaced by the +reduction result (in this case, the maximum value in each position across all the processes). So, after the Allreduce call, the result will become ```a = {1, 2, 3}```. -Rabit provides different reduction operators, for example, you can change ```op::Max``` to ```op::Sum```, -then the reduction operation will become the summation, and the result will become ```a = {1, 3, 5}```. -You can also run example with different processes by setting -n to different values, to see the outcomming result. +Rabit provides different reduction operators, for example, if you change ```op::Max``` to ```op::Sum```, +the reduction operation will be a summation, and the result will become ```a = {1, 3, 5}```. +You can also run the example with different processes by setting -n to different values. -Broadcast is another method provided by rabit besides Allreduce, this function allows one node to broadcast its -local data to all the other nodes. The following code in [broadcast.cc](broadcast.cc) broadcast a string from +Broadcast is another method provided by rabit besides Allreduce. This function allows one node to broadcast its +local data to all other nodes. The following code in [broadcast.cc](broadcast.cc) broadcasts a string from node 0 to all other nodes. ```c++ #include @@ -78,30 +78,29 @@ int main(int argc, char *argv[]) { return 0; } ``` -You can run the program by the following command, using three workers. +The following command starts the program with three worker processes. ```bash ../tracker/rabit_demo.py -n 3 broadcast.rabit ``` -Besides string, rabit also allows broadcast of constant size array and vector. +Besides strings, rabit also allows to broadcast constant size array and vectors. Common Use Case ===== -Many distributed machine learning algorithm involves dividing the data into each node, -compute statistics locally and aggregates them together. Such process is usually done repeatively in -many iterations before the algorithm converge. Allreduce naturally meets the need of such programs, +Many distributed machine learning algorithms involve splitting the data into different nodes, +computing statistics locally, and finally aggregating them. Such workflow is usually done repetitively through +many iterations before the algorithm converges. Allreduce naturally meets the structure of such programs, common use cases include: * Aggregation of gradient values, which can be used in optimization methods such as L-BFGS. -* Aggregation of other statistics, which can be used in KMeans and Gaussian Mixture Model. +* Aggregation of other statistics, which can be used in KMeans and Gaussian Mixture Models. * Find the best split candidate and aggregation of split statistics, used for tree based models. -The main purpose of Rabit is to provide reliable and portable library for distributed machine learning programs. -So that the program can be run reliably on different types of platforms. +Rabit is a reliable and portable library for distributed machine learning programs, that allow programs to run reliably on different platforms. -Structure of Rabit Program +Structure of a Rabit Program ===== -The following code illustrates the common structure of rabit program. This is an abstract example, -you can also refer to [kmeans.cc](../toolkit/kmeans.cc) for an example implementation of kmeans. +The following code illustrates the common structure of a rabit program. This is an abstract example, +you can also refer to [kmeans.cc](../toolkit/kmeans.cc) for an example implementation of kmeans algorithm. ```c++ #include @@ -127,65 +126,65 @@ int main(int argc, char *argv[]) { } ``` -Besides the common Allreduce and Broadcast function, there are two additional functions: ```CheckPoint``` -and ```CheckPoint```. These two functions are used for fault-tolerance purpose. -Common machine learning programs involves several iterations. In each iteration, we start from a model, do some calls -to Allreduce or Broadcasts and update the model to a new one. The calling sequence in each iteration does not need to be the same. +Besides the common Allreduce and Broadcast functions, there are two additional functions: ```LoadCheckPoint``` +and ```CheckPoint```. These two functions are used for fault-tolerance purposes. +As mentioned before, traditional machine learning programs involve several iterations. In each iteration, we start with a model, make some calls +to Allreduce or Broadcast and update the model. The calling sequence in each iteration does not need to be the same. -* When the nodes start from beginning, LoadCheckPoint returns 0, and we can initialize the model. +* When the nodes start from the beginning (i.e. iteration 0), LoadCheckPoint returns 0, so we can initialize the model. * ```CheckPoint``` saves the model after each iteration. - - Efficiency Note: the model is only kept in local memory and no save to disk is involved in Checkpoint + - Efficiency Note: the model is only kept in local memory and no save to disk is performed when calling Checkpoint * When a node goes down and restarts, ```LoadCheckPoint``` will recover the latest saved model, and -* When a node goes down, the rest of the node will block in the call of Allreduce/Broadcast and helps - the recovery of the failure nodes, util it catches up. +* When a node goes down, the rest of the nodes will block in the call of Allreduce/Broadcast and wait for + the recovery of the failed node until it catches up. -Please also see the section of [fault tolerance procedure](#fault-tolerance) in rabit to understand the recovery procedure under going in rabit +Please see the [fault tolerance procedure](#fault-tolerance) section to understand the recovery procedure executed by rabit. Compile Programs with Rabit ==== Rabit is a portable library, to use it, you only need to include the rabit header file. -* You will need to add path to [../include](../include) to the header search path of compiler +* You will need to add the path to [../include](../include) to the header search path of the compiler - Solution 1: add ```-I/path/to/rabit/include``` to the compiler flag in gcc or clang - - Solution 2: add the path to enviroment variable CPLUS_INCLUDE_PATH -* You will need to add path to [../lib](../lib) to the library search path of compiler + - Solution 2: add the path to the environment variable CPLUS_INCLUDE_PATH +* You will need to add the path to [../lib](../lib) to the library search path of the compiler - Solution 1: add ```-L/path/to/rabit/lib``` to the linker flag - - Solution 2: add the path to enviroment variable LIBRARY_PATH AND LD_LIBRARY_PATH + - Solution 2: add the path to environment variable LIBRARY_PATH AND LD_LIBRARY_PATH * Link against lib/rabit.a - - Add ```-lrabit``` to linker flag + - Add ```-lrabit``` to the linker flag -The procedure above allows you to compile a program with rabit. The following two sections are additional -advanced options you can take to link against different backend other than the normal one. +The procedure above allows you to compile a program with rabit. The following two sections contain additional +options you can use to link against different backends other than the normal one. #### Link against MPI Allreduce -You can link against ```rabit_mpi.a``` instead to use MPI Allreduce, however, the resulting program is backed by MPI and +You can link against ```rabit_mpi.a``` instead of using MPI Allreduce, however, the resulting program is backed by MPI and is not fault tolerant anymore. -* Simply change linker flag from ```-lrabit``` to ```-lrabit_mpi``` +* Simply change the linker flag from ```-lrabit``` to ```-lrabit_mpi``` * The final linking needs to be done by mpi wrapper compiler ```mpicxx``` #### Link against Mock Test Rabit Library -If you want to mock test the program to see the behavior of the code when some nodes goes down. You can link against ```rabit_mock.a``` . -* Simply change linker flag from ```-lrabit``` to ```-lrabit_mock``` +If you want to use a mock to test the program in order to see the behavior of the code when some nodes go down, you can link against ```rabit_mock.a``` . +* Simply change the linker flag from ```-lrabit``` to ```-lrabit_mock``` -The resulting rabit program can take in additional arguments in format of +The resulting rabit mock program can take in additional arguments in the following format ``` -mock=rank,version,seq,ndeath +mock=rank,version,seq,ndeath ``` -The four integers specifies an event that will cause the program to suicide(exit with -2) -* rank specifies the rank of the node -* version specifies the current version(iteration) of the model -* seq specifies the sequence number of Allreduce/Broadcast call since last checkpoint +The four integers specify an event that will cause the program to ```commit suicide```(exit with -2) +* rank specifies the rank of the node to kill +* version specifies the version (iteration) of the model where you want the process to die +* seq specifies the sequence number of the Allreduce/Broadcast call since last checkpoint, where the process will be killed * ndeath specifies how many times this node died already -For example, consider the following script in the test case +For example, consider the following script in the test case ```bash ../tracker/rabit_demo.py -n 10 test_model_recover 10000\ mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 ``` -* The first mock will cause node 0 to exit when calling second Allreduce/Broadcast (seq = 1) in iteration 0 -* The second mock will cause node 1 to exit when calling second Allreduce/Broadcast (seq = 1) in iteration 1 -* The second mock will cause node 0 to exit again when calling second Allreduce/Broadcast (seq = 1) in iteration 1 - - Note that ndeath = 1 means this will happen only if node 0 died once, which is our case +* The first mock will cause node 0 to exit when calling the second Allreduce/Broadcast (seq = 1) in iteration 0 +* The second mock will cause node 1 to exit when calling the second Allreduce/Broadcast (seq = 1) in iteration 1 +* The third mock will cause node 1 to exit again when calling second Allreduce/Broadcast (seq = 1) in iteration 1 + - Note that ndeath = 1 means this will happen only if node 1 died once, which is our case Running Rabit Jobs ==== From eef79067a80dd59d2073f0af5d5e549c1de046d0 Mon Sep 17 00:00:00 2001 From: nachocano Date: Sun, 11 Jan 2015 01:31:10 -0800 Subject: [PATCH 184/531] more cosmetic stuff --- guide/README.md | 68 ++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/guide/README.md b/guide/README.md index 8764ae9a1..4db7ef4cf 100644 --- a/guide/README.md +++ b/guide/README.md @@ -87,8 +87,7 @@ Besides strings, rabit also allows to broadcast constant size array and vectors. Common Use Case ===== Many distributed machine learning algorithms involve splitting the data into different nodes, -computing statistics locally, and finally aggregating them. Such workflow is usually done repetitively through -many iterations before the algorithm converges. Allreduce naturally meets the structure of such programs, +computing statistics locally, and finally aggregating them. Such workflow is usually done repetitively through many iterations before the algorithm converges. Allreduce naturally meets the structure of such programs, common use cases include: * Aggregation of gradient values, which can be used in optimization methods such as L-BFGS. @@ -131,14 +130,14 @@ and ```CheckPoint```. These two functions are used for fault-tolerance purposes. As mentioned before, traditional machine learning programs involve several iterations. In each iteration, we start with a model, make some calls to Allreduce or Broadcast and update the model. The calling sequence in each iteration does not need to be the same. -* When the nodes start from the beginning (i.e. iteration 0), LoadCheckPoint returns 0, so we can initialize the model. +* When the nodes start from the beginning (i.e. iteration 0), ```LoadCheckPoint``` returns 0, so we can initialize the model. * ```CheckPoint``` saves the model after each iteration. - Efficiency Note: the model is only kept in local memory and no save to disk is performed when calling Checkpoint * When a node goes down and restarts, ```LoadCheckPoint``` will recover the latest saved model, and * When a node goes down, the rest of the nodes will block in the call of Allreduce/Broadcast and wait for the recovery of the failed node until it catches up. -Please see the [fault tolerance procedure](#fault-tolerance) section to understand the recovery procedure executed by rabit. +Please see the [Fault Tolerance](#fault-tolerance) section to understand the recovery procedure executed by rabit. Compile Programs with Rabit ==== @@ -191,73 +190,72 @@ Running Rabit Jobs Rabit is a portable library that can run on multiple platforms. #### Running Rabit Locally -* You can use [../tracker/rabit_demo.py](../tracker/rabit_demo.py) to start n process locally +* You can use [../tracker/rabit_demo.py](../tracker/rabit_demo.py) to start n processes locally * This script will restart the program when it exits with -2, so it can be used for [mock test](#link-against-mock-test-library) #### Running Rabit on Hadoop -* You can use [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) to run rabit program on hadoop -* This will start n rabit program as mapper of MapReduce -* Each program can read its part of data from stdin -* Yarn is highly recommended, since Yarn allows specifying ncpu and memory of each mapper +* You can use [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) to run rabit programs on hadoop +* This will start n rabit programs as mappers of MapReduce +* Each program can read its portion of data from stdin +* Yarn is highly recommended, since Yarn allows specifying number of cpus and memory of each mapper: - This allows multi-threading programs in each node, which can be more efficient - - A good possible practice is OpenMP-rabit hybrid code + - An easy multi-threading solution could be to use OpenMP with rabit code #### Running Rabit on Yarn * To Be modified from [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) #### Running Rabit using MPI -* You can submit rabit programs to MPI cluster using [../tracker/rabit_mpi.py](../tracker/rabit_mpi.py). +* You can submit rabit programs to an MPI cluster using [../tracker/rabit_mpi.py](../tracker/rabit_mpi.py). * If you linked your code against librabit_mpi.a, then you can directly use mpirun to submit the job #### Customize Tracker Script -You can also modify the tracker script to allow rabit run on other platforms. To do so, refer to the existing -tracker script such as [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) and [../tracker/rabit_mpi.py](../tracker/rabit_mpi.py) +You can also modify the tracker script to allow rabit to run on other platforms. To do so, refer to existing +tracker scripts, such as [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) and [../tracker/rabit_mpi.py](../tracker/rabit_mpi.py) to get a sense of how it is done. You will need to implement a platform dependent submission function with the following definition ```python -def fun_submit(nslave, slave_args): +def fun_submit(nworkers, worker_args): """ - customized submit script, that submit nslave jobs, + customized submit script, that submits nslave jobs, each must contain args as parameter note this can be a lambda closure Parameters - nslave number of slave process to start up + nworkers number of worker processes to start worker_args tracker information which must be passed to the arguments - this usually includes the parameters of master_uri and port etc. + this usually includes the parameters of master_uri and port, etc. """ ``` -The submission function should start nslave process in the platform, and append slave_args to the end of other arguments. -Then we can simply call ```tracker.submit``` with fun_submit to submit jobs in the target platform +The submission function should start nworkers processes in the platform, and append worker_args to the end of the other arguments. +Then you can simply call ```tracker.submit``` with fun_submit to submit jobs to the target platform -Note that the current rabit tracker do not restart a worker when it dies, the job of fail-restart thus lies on the platform itself or we should write -fail-restart logic in the customization script. +Note that the current rabit tracker does not restart a worker when it dies, the restart of a node is done by the platform, otherwise we should write the fail-restart logic in the custom script. * Fail-restart is usually provided by most platforms. * For example, mapreduce will restart a mapper when it fails Fault Tolerance ===== -This section introduces the how fault tolerance works in rabit. -We can use the following figure to show the how rabit deals with failures. +This section introduces how fault tolerance works in rabit. +The following figure shows how rabit deals with failures. ![](http://homes.cs.washington.edu/~tqchen/rabit/fig/fault-tol.png) The scenario is as follows: -* Node 1 fails between the first and second call of Allreduce after the latest checkpoint -* Other nodes stay in the call of second Allreduce to help node 1 to recover. +* Node 1 fails between the first and second call of Allreduce after the second checkpoint +* The other nodes wait in the call of the second Allreduce in order to help node 1 to recover. * When node 1 restarts, it will call ```LoadCheckPoint```, and get the latest checkpoint from one of the existing nodes. * Then node 1 can start from the latest checkpoint and continue running. -* When node 1 call the first Allreduce again, because the other nodes already knows the result of allreduce, node 1 can get the result from one of the nodes. -* When node 1 reaches the second Allreduce, other nodes find out that node 1 has catched up and they can continue the program normally. +* When node 1 calls the first Allreduce again, as the other nodes already know the result, node 1 can get it from one of them. +* When node 1 reaches the second Allreduce, the other nodes find out that node 1 has catched up and they can continue the program normally. -We can find that this fault tolerance model is based on the a key property of Allreduce and Broadcast: -All the nodes get the same result when calling Allreduce/Broadcast. Because of this property, we can have some node records the history, -and when a node recovers, the result can be forwarded to the recovering node. +This fault tolerance model is based on a key property of Allreduce and Broadcast: +All the nodes get the same result when calling Allreduce/Broadcast. Because of this property, any node can record the history, +and when a node recovers, the result can be forwarded to it. -The checkpoint is introduced so that we do not have to discard the history before the checkpoint, so that the iterative program can be more -efficient. The strategy of rabit is different from fail-restart strategy where all the nodes restarts from checkpoint -when any of the node fails. All the program only block in the Allreduce call to help the recovery, and the checkpoint is only saved locally without -touching the disk. This makes rabit program more reliable and efficient. +The checkpoint is introduced so that we do not have to discard the history before checkpointing, so that the iterative program can be more +efficient. The strategy of rabit is different from the fail-restart strategy where all the nodes restart from the same checkpoint +when any of them fails. All the processes block in the Allreduce call to help the recovery, and the checkpoint is only saved locally without +touching the disk. This makes rabit programs more reliable and efficient. -This is an conceptual introduction to the fault tolerant model of rabit. The actual implementation is more sophiscated, +This is just a conceptual introduction to rabit's fault tolerance model. The actual implementation is more sophisticated, and can deal with more complicated cases such as multiple nodes failure and node failure during recovery phase. From 2d97833f4878f56961f0a67b50a90ba251a3078d Mon Sep 17 00:00:00 2001 From: nachocano Date: Sun, 11 Jan 2015 01:35:04 -0800 Subject: [PATCH 185/531] slightly change --- guide/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/guide/README.md b/guide/README.md index 4db7ef4cf..93f2f210f 100644 --- a/guide/README.md +++ b/guide/README.md @@ -251,9 +251,9 @@ This fault tolerance model is based on a key property of Allreduce and Broadcast All the nodes get the same result when calling Allreduce/Broadcast. Because of this property, any node can record the history, and when a node recovers, the result can be forwarded to it. -The checkpoint is introduced so that we do not have to discard the history before checkpointing, so that the iterative program can be more +The checkpoint is introduced so that we can discard the history after checkpointing, so that the iterative program can be more efficient. The strategy of rabit is different from the fail-restart strategy where all the nodes restart from the same checkpoint -when any of them fails. All the processes block in the Allreduce call to help the recovery, and the checkpoint is only saved locally without +when any of them fail. All the processes will block in the Allreduce call to help the recovery, and the checkpoint is only saved locally without touching the disk. This makes rabit programs more reliable and efficient. This is just a conceptual introduction to rabit's fault tolerance model. The actual implementation is more sophisticated, From d269cb9c50927779eea070c5eafe04ca3061113f Mon Sep 17 00:00:00 2001 From: nachocano Date: Sun, 11 Jan 2015 01:43:32 -0800 Subject: [PATCH 186/531] guide stuff --- guide/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/guide/README.md b/guide/README.md index 93f2f210f..a2ef94160 100644 --- a/guide/README.md +++ b/guide/README.md @@ -251,8 +251,7 @@ This fault tolerance model is based on a key property of Allreduce and Broadcast All the nodes get the same result when calling Allreduce/Broadcast. Because of this property, any node can record the history, and when a node recovers, the result can be forwarded to it. -The checkpoint is introduced so that we can discard the history after checkpointing, so that the iterative program can be more -efficient. The strategy of rabit is different from the fail-restart strategy where all the nodes restart from the same checkpoint +The checkpoint is introduced so that we can discard the history after checkpointing, this makes the iterative program more efficient. The strategy of rabit is different from the fail-restart strategy where all the nodes restart from the same checkpoint when any of them fail. All the processes will block in the Allreduce call to help the recovery, and the checkpoint is only saved locally without touching the disk. This makes rabit programs more reliable and efficient. From d81fb6a9e6f3650d46e53a061cf9e5ede0c119a5 Mon Sep 17 00:00:00 2001 From: chenshuaihua Date: Sun, 11 Jan 2015 21:59:38 +0800 Subject: [PATCH 187/531] test --- tracker/rabit_yarn.py | 89 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 tracker/rabit_yarn.py diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py new file mode 100644 index 000000000..2f896b2af --- /dev/null +++ b/tracker/rabit_yarn.py @@ -0,0 +1,89 @@ +#!/usr/bin/python +""" +This is a script to submit rabit job using hadoop streaming +submit the rabit process as mappers of MapReduce +""" +import argparse +import sys +import os +import time +import subprocess +import rabit_tracker as tracker + +#!!! Set path to hadoop and hadoop streaming jar here +hadoop_binary = 'hadoop' +hadoop_streaming_jar = None + +# code +hadoop_home = os.getenv('HADOOP_HOME') +if hadoop_home != None: + if hadoop_binary == None: + hadoop_binary = hadoop_home + '/bin/hadoop' + assert os.path.exists(hadoop_binary), "HADDOP_HOME does not contain the hadoop binary" + if hadoop_streaming_jar == None: + hadoop_streaming_jar = hadoop_home + '/lib/hadoop-streaming.jar' + assert os.path.exists(hadoop_streaming_jar), "HADDOP_HOME does not contain the haddop streaming jar" + +if hadoop_binary == None or hadoop_streaming_jar == None: + print 'Warning: Cannot auto-detect path to hadoop and streaming jar, need to set them via arguments -hs and -hb' + print '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14' + +parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using Hadoop Streaming') +parser.add_argument('-nw', '--nworker', required=True, type=int, + help = 'number of worker proccess to be launched') +parser.add_argument('-nt', '--nthread', required=True, type=int, + help = 'number of thread of each mapper to be launched') +parser.add_argument('-i', '--input', required=True, + help = 'input path in HDFS') +parser.add_argument('-o', '--output', required=True, + help = 'output path in HDFS') +parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, + help = 'print more messages into the console') +parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type=int, + help = 'whether automatically cache the files in the command to hadoop localfile, this is on by default') +parser.add_argument('-f', '--files', nargs = '*', + help = 'the cached file list in mapreduce,'\ + ' the submission script will automatically cache all the files which appears in command.'\ + ' You may need this option to cache additional files.'\ + ' You can also use it to manually cache files when auto_file_cache is off') +parser.add_argument('--jobname', help = 'customize jobname in tracker') +if hadoop_binary == None: + parser.add_argument('-hb', '--hadoop_binary', required = True, + help="path-to-hadoop binary folder") +else: + parser.add_argument('-hb', '--hadoop_binary', default = hadoop_binary, + help="path-to-hadoop binary folder") + +if hadoop_streaming_jar == None: + parser.add_argument('-jar', '--hadoop_streaming_jar', required = True, + help='path-to hadoop streamimg jar file') +else: + parser.add_argument('-jar', '--hadoop_streaming_jar', default = hadoop_streaming_jar, + help='path-to hadoop streamimg jar file') +parser.add_argument('command', nargs='+', + help = 'command for rabit program') +args = parser.parse_args() + +if args.jobname is None: + args.jobname = ('Rabit(nworker=%d):' % args.nworker) + args.command[0].split('/')[-1]; + +def hadoop_streaming(nworker, slave_args): + cmd = '%s jar %s -D mapreduce.job.maps=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) + cmd += ' -D mapreduce.job.name=%s' % (args.jobname) + cmd += ' -D mapreduce.map.cpu.vcores=%d' % (args.nthread) + cmd += ' -input %s -output %s' % (args.input, args.output) + cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) + fset = set() + if args.auto_file_cache: + for f in args.command: + if os.path.exists(f): + fset.add(f) + for flst in args.files: + for f in flst.split('#'): + fset.add(f) + for f in fset: + cmd += ' -file %s' % f + print cmd + subprocess.check_call(cmd, shell = True) + +tracker.submit(args.nworker, [], fun_submit = hadoop_streaming, verbose = args.verbose) From b5ac85f103033a02802a90faf8361d700931dcc9 Mon Sep 17 00:00:00 2001 From: chenshuaihua Date: Sun, 11 Jan 2015 23:19:04 +0800 Subject: [PATCH 188/531] yarn script --- tracker/rabit_yarn.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py index 2f896b2af..9922b55d2 100644 --- a/tracker/rabit_yarn.py +++ b/tracker/rabit_yarn.py @@ -46,7 +46,14 @@ parser.add_argument('-f', '--files', nargs = '*', ' the submission script will automatically cache all the files which appears in command.'\ ' You may need this option to cache additional files.'\ ' You can also use it to manually cache files when auto_file_cache is off') -parser.add_argument('--jobname', help = 'customize jobname in tracker') +parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker') +parser.add_argument('--timeout', default=600000000, type=int, + help = 'timeout (in million seconds) of each mapper job, automatically set to a very long time,'\ + 'normally you do not need to set this ') +parser.add_argument('-m', '--memory_mb', default=1024, type=int, + help = 'maximum memory used by the process, Guide: set it large (near mapreduce.jobtracker.maxmapmemory.mb).'\ + 'if you are running multi-threading rabit,'\ + 'so that each node can occupy all the mapper slots in a machine for maximum performance') if hadoop_binary == None: parser.add_argument('-hb', '--hadoop_binary', required = True, help="path-to-hadoop binary folder") @@ -71,6 +78,8 @@ def hadoop_streaming(nworker, slave_args): cmd = '%s jar %s -D mapreduce.job.maps=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) cmd += ' -D mapreduce.job.name=%s' % (args.jobname) cmd += ' -D mapreduce.map.cpu.vcores=%d' % (args.nthread) + cmd += ' -D mapreduce.task.timeout=%d' % (args.timeout) + cmd += ' -D mapreduce.map.memory.mb=%d' % (args.memory_mb) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) fset = set() From 5e843cfbbd58a9d77d837d6f5711b2a30a9fa89a Mon Sep 17 00:00:00 2001 From: chenshuaihua Date: Sun, 11 Jan 2015 23:22:26 +0800 Subject: [PATCH 189/531] yarn script --- tracker/rabit_yarn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py index 9922b55d2..76af354e3 100644 --- a/tracker/rabit_yarn.py +++ b/tracker/rabit_yarn.py @@ -78,8 +78,8 @@ def hadoop_streaming(nworker, slave_args): cmd = '%s jar %s -D mapreduce.job.maps=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) cmd += ' -D mapreduce.job.name=%s' % (args.jobname) cmd += ' -D mapreduce.map.cpu.vcores=%d' % (args.nthread) - cmd += ' -D mapreduce.task.timeout=%d' % (args.timeout) cmd += ' -D mapreduce.map.memory.mb=%d' % (args.memory_mb) + cmd += ' -D mapreduce.task.timeout=%d' % (args.timeout) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) fset = set() From 981f69ff55b3f13bd3f65e32dfa2fd4859017b29 Mon Sep 17 00:00:00 2001 From: chenshuaihua Date: Sun, 11 Jan 2015 23:23:58 +0800 Subject: [PATCH 190/531] yarn script --- tracker/rabit_yarn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py index 76af354e3..bc794ba1b 100644 --- a/tracker/rabit_yarn.py +++ b/tracker/rabit_yarn.py @@ -79,7 +79,7 @@ def hadoop_streaming(nworker, slave_args): cmd += ' -D mapreduce.job.name=%s' % (args.jobname) cmd += ' -D mapreduce.map.cpu.vcores=%d' % (args.nthread) cmd += ' -D mapreduce.map.memory.mb=%d' % (args.memory_mb) - cmd += ' -D mapreduce.task.timeout=%d' % (args.timeout) +cmd += ' -D mapreduce.task.timeout=%d' % (args.timeout) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) fset = set() From 00323f462aaf7156805ba4b40272d9e36afc7eca Mon Sep 17 00:00:00 2001 From: chenshuaihua Date: Sun, 11 Jan 2015 23:32:14 +0800 Subject: [PATCH 191/531] yarn script --- tracker/rabit_yarn.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py index bc794ba1b..3ba8ddfc8 100644 --- a/tracker/rabit_yarn.py +++ b/tracker/rabit_yarn.py @@ -74,12 +74,14 @@ args = parser.parse_args() if args.jobname is None: args.jobname = ('Rabit(nworker=%d):' % args.nworker) + args.command[0].split('/')[-1]; + + def hadoop_streaming(nworker, slave_args): cmd = '%s jar %s -D mapreduce.job.maps=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) - cmd += ' -D mapreduce.job.name=%s' % (args.jobname) - cmd += ' -D mapreduce.map.cpu.vcores=%d' % (args.nthread) + cmd += ' -D mapreduce.job.name' % (args.jobname) + cmd += ' -D mapreduce.task.timeout=%d' % (args.timeout) cmd += ' -D mapreduce.map.memory.mb=%d' % (args.memory_mb) -cmd += ' -D mapreduce.task.timeout=%d' % (args.timeout) + cmd += ' -D mapreduce.map.cpu.vcores=%d' % (args.nthread) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) fset = set() From 26b5fdac40aae50a4e9a38a5fffc6c80da6553cd Mon Sep 17 00:00:00 2001 From: chenshuaihua Date: Sun, 11 Jan 2015 23:54:31 +0800 Subject: [PATCH 192/531] yarn script --- tracker/rabit_yarn.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py index 3ba8ddfc8..b5bd53677 100644 --- a/tracker/rabit_yarn.py +++ b/tracker/rabit_yarn.py @@ -71,10 +71,9 @@ parser.add_argument('command', nargs='+', help = 'command for rabit program') args = parser.parse_args() -if args.jobname is None: - args.jobname = ('Rabit(nworker=%d):' % args.nworker) + args.command[0].split('/')[-1]; - - +if args.jobname == 'auto': + args.jobname = ('Rabit[nworker=%d]:' % args.nworker) + args.command[0].split('/')[-1]; + def hadoop_streaming(nworker, slave_args): cmd = '%s jar %s -D mapreduce.job.maps=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) From b2dec958621cf5384f67f467604c1dcf1cfe1fe6 Mon Sep 17 00:00:00 2001 From: chenshuaihua Date: Mon, 12 Jan 2015 00:09:00 +0800 Subject: [PATCH 193/531] yarn script --- tracker/rabit_yarn.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py index b5bd53677..0b542c099 100644 --- a/tracker/rabit_yarn.py +++ b/tracker/rabit_yarn.py @@ -21,7 +21,7 @@ if hadoop_home != None: hadoop_binary = hadoop_home + '/bin/hadoop' assert os.path.exists(hadoop_binary), "HADDOP_HOME does not contain the hadoop binary" if hadoop_streaming_jar == None: - hadoop_streaming_jar = hadoop_home + '/lib/hadoop-streaming.jar' + hadoop_streaming_jar = hadoop_home + '/lib/hadoop-streaming.jar' assert os.path.exists(hadoop_streaming_jar), "HADDOP_HOME does not contain the haddop streaming jar" if hadoop_binary == None or hadoop_streaming_jar == None: @@ -48,12 +48,14 @@ parser.add_argument('-f', '--files', nargs = '*', ' You can also use it to manually cache files when auto_file_cache is off') parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker') parser.add_argument('--timeout', default=600000000, type=int, - help = 'timeout (in million seconds) of each mapper job, automatically set to a very long time,'\ - 'normally you do not need to set this ') + help = 'timeout (in million seconds) of each mapper job, automatically set to a very long time,'\ + 'normally you do not need to set this ') parser.add_argument('-m', '--memory_mb', default=1024, type=int, - help = 'maximum memory used by the process, Guide: set it large (near mapreduce.jobtracker.maxmapmemory.mb).'\ - 'if you are running multi-threading rabit,'\ - 'so that each node can occupy all the mapper slots in a machine for maximum performance') + help = 'maximum memory used by the process, Guide: set it large (near mapreduce.jobtracker.maxmapmemory.mb).'\ + 'if you are running multi-threading rabit,'\ + 'so that each node can occupy all the mapper slots in a machine for maximum performance') + + if hadoop_binary == None: parser.add_argument('-hb', '--hadoop_binary', required = True, help="path-to-hadoop binary folder") @@ -72,15 +74,14 @@ parser.add_argument('command', nargs='+', args = parser.parse_args() if args.jobname == 'auto': - args.jobname = ('Rabit[nworker=%d]:' % args.nworker) + args.command[0].split('/')[-1]; - - + args.jobname = ('Rabit[nworker=%d]:' % args.nworker) + args.command[0].split('/')[-1]; + def hadoop_streaming(nworker, slave_args): cmd = '%s jar %s -D mapreduce.job.maps=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) - cmd += ' -D mapreduce.job.name' % (args.jobname) + cmd += ' -D mapreduce.job.name=%s' % (args.jobname) + cmd += ' -D mapreduce.map.cpu.vcores=%d' % (args.nthread) cmd += ' -D mapreduce.task.timeout=%d' % (args.timeout) cmd += ' -D mapreduce.map.memory.mb=%d' % (args.memory_mb) - cmd += ' -D mapreduce.map.cpu.vcores=%d' % (args.nthread) cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) fset = set() From 78bfe867e6dec899e4e47544d986afe01c085e45 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 11 Jan 2015 11:13:02 -0800 Subject: [PATCH 194/531] unify hadoop and yarn script --- tracker/rabit_hadoop.py | 42 +++++++++++++---- tracker/rabit_yarn.py | 100 ++-------------------------------------- 2 files changed, 39 insertions(+), 103 deletions(-) mode change 100644 => 100755 tracker/rabit_yarn.py diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 3d5977e14..fdd949c2c 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -10,6 +10,7 @@ import time import subprocess import rabit_tracker as tracker + #!!! Set path to hadoop and hadoop streaming jar here hadoop_binary = 'hadoop' hadoop_streaming_jar = None @@ -28,9 +29,12 @@ if hadoop_binary == None or hadoop_streaming_jar == None: print 'Warning: Cannot auto-detect path to hadoop and hadoop-streaming jar, need to set them via arguments -hs and -hb' print '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14' -parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using Hadoop Streaming') +parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using Hadoop Streaming.'\ + 'This script support both Hadoop 1.0 and Yarn(MRv2), Yarn is recommended') parser.add_argument('-n', '--nworker', required=True, type=int, help = 'number of worker proccess to be launched') +parser.add_argument('-nt', '--nthread', default = -1, type=int, + help = 'number of thread in each mapper to be launched, set it if each rabit job is multi-threaded') parser.add_argument('-i', '--input', required=True, help = 'input path in HDFS') parser.add_argument('-o', '--output', required=True, @@ -51,7 +55,7 @@ parser.add_argument('--jobname', default='auto', help = 'customize jobname in tr parser.add_argument('--timeout', default=600000000, type=int, help = 'timeout (in million seconds) of each mapper job, automatically set to a very long time,'\ 'normally you do not need to set this ') -parser.add_argument('-m', '--memory_mb', default=-1, type=int, +parser.add_argument('-mem', '--memory_mb', default=-1, type=int, help = 'maximum memory used by the process, Guide: set it large (near mapred.cluster.max.map.memory.mb)'\ 'if you are running multi-threading rabit,'\ 'so that each node can occupy all the mapper slots in a machine for maximum performance') @@ -75,7 +79,7 @@ args = parser.parse_args() if args.jobname == 'auto': args.jobname = ('Rabit[nworker=%d]:' % args.nworker) + args.command[0].split('/')[-1]; -def hadoop_streaming(nworker, worker_args): +def hadoop_streaming(nworker, worker_args, yarn = False): fset = set() if args.auto_file_cache: for i in range(len(args.command)): @@ -86,10 +90,30 @@ def hadoop_streaming(nworker, worker_args): args.command[i] = './' + args.command[i].split('/')[-1] else: args.command[i] = args.command[i].split('/')[-1] - cmd = '%s jar %s -D mapred.map.tasks=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) - cmd += ' -Dmapred.job.name=%s' % (args.jobname) - cmd += ' -Dmapred.task.timeout=%d' % (args.timeout) - cmd += ' -Dmapred.job.map.memory.mb=%d' % (args.memory_mb) + kmap = {} + # setup keymaps + if yarn: + kmap['nworker'] = 'mapreduce.job.maps' + kmap['jobname'] = 'mapreduce.job.name' + kmap['nthread'] = 'mapreduce.map.cpu.vcores' + kmap['timeout'] = 'mapreduce.task.timeout' + kmap['memory_mb'] = 'mapreduce.map.memory.mb' + else: + kmap['nworker'] = 'mapred.map.tasks' + kmap['jobname'] = 'mapred.job.name' + kmap['nthread'] = None + kmap['timeout'] = 'mapred.task.timeout' + kmap['memory_mb'] = 'mapred.job.map.memory.mb' + cmd = '%s jar %s' % (args.hadoop_binary, args.hadoop_streaming_jar) + cmd += ' -D%s=%d' % (kmap['nworker'], nworker) + cmd += ' -D%s=%s' % (kmap['jobname'], args.jobname) + if args.nthread != -1: + assert kmap['nthread'] is not None, "nthread can only be set in Yarn cluster, it is highly recommended to " + cmd += ' -D%s=%d' % (kmap['ntread'], args.nthread) + cmd += ' -D%s=%d' % (kmap['timeout'], args.timeout) + if args.memory_mb != -1: + cmd += ' -D%s=%d' % (kmap['timeout'], args.timeout) + cmd += ' -input %s -output %s' % (args.input, args.output) cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + worker_args)) if args.files != None: @@ -101,4 +125,6 @@ def hadoop_streaming(nworker, worker_args): print cmd subprocess.check_call(cmd, shell = True) -tracker.submit(args.nworker, [], fun_submit = hadoop_streaming, verbose = args.verbose) +if __name__ == 'main': + fun_submit = lambda nworker, worker_args: hadoop_streaming(nworker, worker_args, False) + tracker.submit(args.nworker, [], fun_submit = fun_submit, verbose = args.verbose) diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py old mode 100644 new mode 100755 index 0b542c099..3bf231cae --- a/tracker/rabit_yarn.py +++ b/tracker/rabit_yarn.py @@ -1,100 +1,10 @@ #!/usr/bin/python """ -This is a script to submit rabit job using hadoop streaming +This is a script to submit rabit job using Yarn submit the rabit process as mappers of MapReduce """ -import argparse -import sys -import os -import time -import subprocess -import rabit_tracker as tracker +import rabit_hadoop -#!!! Set path to hadoop and hadoop streaming jar here -hadoop_binary = 'hadoop' -hadoop_streaming_jar = None - -# code -hadoop_home = os.getenv('HADOOP_HOME') -if hadoop_home != None: - if hadoop_binary == None: - hadoop_binary = hadoop_home + '/bin/hadoop' - assert os.path.exists(hadoop_binary), "HADDOP_HOME does not contain the hadoop binary" - if hadoop_streaming_jar == None: - hadoop_streaming_jar = hadoop_home + '/lib/hadoop-streaming.jar' - assert os.path.exists(hadoop_streaming_jar), "HADDOP_HOME does not contain the haddop streaming jar" - -if hadoop_binary == None or hadoop_streaming_jar == None: - print 'Warning: Cannot auto-detect path to hadoop and streaming jar, need to set them via arguments -hs and -hb' - print '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14' - -parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using Hadoop Streaming') -parser.add_argument('-nw', '--nworker', required=True, type=int, - help = 'number of worker proccess to be launched') -parser.add_argument('-nt', '--nthread', required=True, type=int, - help = 'number of thread of each mapper to be launched') -parser.add_argument('-i', '--input', required=True, - help = 'input path in HDFS') -parser.add_argument('-o', '--output', required=True, - help = 'output path in HDFS') -parser.add_argument('-v', '--verbose', default=0, choices=[0, 1], type=int, - help = 'print more messages into the console') -parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type=int, - help = 'whether automatically cache the files in the command to hadoop localfile, this is on by default') -parser.add_argument('-f', '--files', nargs = '*', - help = 'the cached file list in mapreduce,'\ - ' the submission script will automatically cache all the files which appears in command.'\ - ' You may need this option to cache additional files.'\ - ' You can also use it to manually cache files when auto_file_cache is off') -parser.add_argument('--jobname', default='auto', help = 'customize jobname in tracker') -parser.add_argument('--timeout', default=600000000, type=int, - help = 'timeout (in million seconds) of each mapper job, automatically set to a very long time,'\ - 'normally you do not need to set this ') -parser.add_argument('-m', '--memory_mb', default=1024, type=int, - help = 'maximum memory used by the process, Guide: set it large (near mapreduce.jobtracker.maxmapmemory.mb).'\ - 'if you are running multi-threading rabit,'\ - 'so that each node can occupy all the mapper slots in a machine for maximum performance') - - -if hadoop_binary == None: - parser.add_argument('-hb', '--hadoop_binary', required = True, - help="path-to-hadoop binary folder") -else: - parser.add_argument('-hb', '--hadoop_binary', default = hadoop_binary, - help="path-to-hadoop binary folder") - -if hadoop_streaming_jar == None: - parser.add_argument('-jar', '--hadoop_streaming_jar', required = True, - help='path-to hadoop streamimg jar file') -else: - parser.add_argument('-jar', '--hadoop_streaming_jar', default = hadoop_streaming_jar, - help='path-to hadoop streamimg jar file') -parser.add_argument('command', nargs='+', - help = 'command for rabit program') -args = parser.parse_args() - -if args.jobname == 'auto': - args.jobname = ('Rabit[nworker=%d]:' % args.nworker) + args.command[0].split('/')[-1]; - -def hadoop_streaming(nworker, slave_args): - cmd = '%s jar %s -D mapreduce.job.maps=%d' % (args.hadoop_binary, args.hadoop_streaming_jar, nworker) - cmd += ' -D mapreduce.job.name=%s' % (args.jobname) - cmd += ' -D mapreduce.map.cpu.vcores=%d' % (args.nthread) - cmd += ' -D mapreduce.task.timeout=%d' % (args.timeout) - cmd += ' -D mapreduce.map.memory.mb=%d' % (args.memory_mb) - cmd += ' -input %s -output %s' % (args.input, args.output) - cmd += ' -mapper \"%s\" -reducer \"/bin/cat\" ' % (' '.join(args.command + slave_args)) - fset = set() - if args.auto_file_cache: - for f in args.command: - if os.path.exists(f): - fset.add(f) - for flst in args.files: - for f in flst.split('#'): - fset.add(f) - for f in fset: - cmd += ' -file %s' % f - print cmd - subprocess.check_call(cmd, shell = True) - -tracker.submit(args.nworker, [], fun_submit = hadoop_streaming, verbose = args.verbose) +if __name__ == 'main': + fun_submit = lambda nworker, worker_args: hadoop_streaming(nworker, worker_args, True) + tracker.submit(args.nworker, [], fun_submit = fun_submit, verbose = args.verbose) From bfc3f61010d9b8acdc9b875f44501c90b2f42976 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 11 Jan 2015 11:15:12 -0800 Subject: [PATCH 195/531] minor --- tracker/rabit_hadoop.py | 5 +++-- tracker/rabit_yarn.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index fdd949c2c..8ecb2c04d 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -108,7 +108,8 @@ def hadoop_streaming(nworker, worker_args, yarn = False): cmd += ' -D%s=%d' % (kmap['nworker'], nworker) cmd += ' -D%s=%s' % (kmap['jobname'], args.jobname) if args.nthread != -1: - assert kmap['nthread'] is not None, "nthread can only be set in Yarn cluster, it is highly recommended to " + assert kmap['nthread'] is not None, 'nthread can only be set in Yarn(Hadoop 2.x) cluster'\ + 'it is recommended to use Yarn to submit rabit jobs' cmd += ' -D%s=%d' % (kmap['ntread'], args.nthread) cmd += ' -D%s=%d' % (kmap['timeout'], args.timeout) if args.memory_mb != -1: @@ -126,5 +127,5 @@ def hadoop_streaming(nworker, worker_args, yarn = False): subprocess.check_call(cmd, shell = True) if __name__ == 'main': - fun_submit = lambda nworker, worker_args: hadoop_streaming(nworker, worker_args, False) + fun_submit = lambda nworker, worker_args: hadoop_streaming(nworker, worker_args, yarn=False) tracker.submit(args.nworker, [], fun_submit = fun_submit, verbose = args.verbose) diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py index 3bf231cae..ed80595eb 100755 --- a/tracker/rabit_yarn.py +++ b/tracker/rabit_yarn.py @@ -6,5 +6,5 @@ submit the rabit process as mappers of MapReduce import rabit_hadoop if __name__ == 'main': - fun_submit = lambda nworker, worker_args: hadoop_streaming(nworker, worker_args, True) + fun_submit = lambda nworker, worker_args: hadoop_streaming(nworker, worker_args, yarn=True) tracker.submit(args.nworker, [], fun_submit = fun_submit, verbose = args.verbose) From db2ebf741061ab7b5180a751bd715692e26dce34 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 11 Jan 2015 11:46:12 -0800 Subject: [PATCH 196/531] use unified script, auto detect hadoop version --- tracker/rabit_hadoop.py | 32 ++++++++++++++++++++++---------- tracker/rabit_yarn.py | 10 ---------- 2 files changed, 22 insertions(+), 20 deletions(-) delete mode 100755 tracker/rabit_yarn.py diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 8ecb2c04d..ecd05ce1f 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -8,6 +8,7 @@ import sys import os import time import subprocess +import warnings import rabit_tracker as tracker @@ -26,8 +27,8 @@ if hadoop_home != None: assert os.path.exists(hadoop_streaming_jar), "HADDOP_HOME does not contain the haddop streaming jar" if hadoop_binary == None or hadoop_streaming_jar == None: - print 'Warning: Cannot auto-detect path to hadoop and hadoop-streaming jar, need to set them via arguments -hs and -hb' - print '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14' + warnings.warn('Warning: Cannot auto-detect path to hadoop and hadoop-streaming jar, need to set them via arguments -hs and -hb\n'\ + '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14') parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using Hadoop Streaming.'\ 'This script support both Hadoop 1.0 and Yarn(MRv2), Yarn is recommended') @@ -79,7 +80,17 @@ args = parser.parse_args() if args.jobname == 'auto': args.jobname = ('Rabit[nworker=%d]:' % args.nworker) + args.command[0].split('/')[-1]; -def hadoop_streaming(nworker, worker_args, yarn = False): +# detech hadoop version +(out, err) = subprocess.Popen('%s version' % args.hadoop_binary, shell = True, stdout=subprocess.PIPE).communicate() +out = out.split('\n')[0].split() +assert out[0] == 'Hadoop', 'cannot parse hadoop version string' +hadoop_version = out[1].split('.') +use_yarn = int(hadoop_version[0]) >= 2 + +if not use_yarn: + print 'Current Hadoop Version is %s' % out[1] + +def hadoop_streaming(nworker, worker_args, use_yarn): fset = set() if args.auto_file_cache: for i in range(len(args.command)): @@ -92,7 +103,7 @@ def hadoop_streaming(nworker, worker_args, yarn = False): args.command[i] = args.command[i].split('/')[-1] kmap = {} # setup keymaps - if yarn: + if use_yarn: kmap['nworker'] = 'mapreduce.job.maps' kmap['jobname'] = 'mapreduce.job.name' kmap['nthread'] = 'mapreduce.map.cpu.vcores' @@ -108,9 +119,11 @@ def hadoop_streaming(nworker, worker_args, yarn = False): cmd += ' -D%s=%d' % (kmap['nworker'], nworker) cmd += ' -D%s=%s' % (kmap['jobname'], args.jobname) if args.nthread != -1: - assert kmap['nthread'] is not None, 'nthread can only be set in Yarn(Hadoop 2.x) cluster'\ - 'it is recommended to use Yarn to submit rabit jobs' - cmd += ' -D%s=%d' % (kmap['ntread'], args.nthread) + if kmap['nthread'] is None: + warnings.warn('nthread can only be set in Yarn(Hadoop version greater than 2.0),'\ + 'it is recommended to use Yarn to submit rabit jobs') + else: + cmd += ' -D%s=%d' % (kmap['nthread'], args.nthread) cmd += ' -D%s=%d' % (kmap['timeout'], args.timeout) if args.memory_mb != -1: cmd += ' -D%s=%d' % (kmap['timeout'], args.timeout) @@ -126,6 +139,5 @@ def hadoop_streaming(nworker, worker_args, yarn = False): print cmd subprocess.check_call(cmd, shell = True) -if __name__ == 'main': - fun_submit = lambda nworker, worker_args: hadoop_streaming(nworker, worker_args, yarn=False) - tracker.submit(args.nworker, [], fun_submit = fun_submit, verbose = args.verbose) +fun_submit = lambda nworker, worker_args: hadoop_streaming(nworker, worker_args, int(hadoop_version[0]) >= 2) +tracker.submit(args.nworker, [], fun_submit = fun_submit, verbose = args.verbose) diff --git a/tracker/rabit_yarn.py b/tracker/rabit_yarn.py deleted file mode 100755 index ed80595eb..000000000 --- a/tracker/rabit_yarn.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/python -""" -This is a script to submit rabit job using Yarn -submit the rabit process as mappers of MapReduce -""" -import rabit_hadoop - -if __name__ == 'main': - fun_submit = lambda nworker, worker_args: hadoop_streaming(nworker, worker_args, yarn=True) - tracker.submit(args.nworker, [], fun_submit = fun_submit, verbose = args.verbose) From 5146409a1d10e6acc91f03a63ef714b951bece34 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 11 Jan 2015 11:47:37 -0800 Subject: [PATCH 197/531] simpler --- tracker/rabit_hadoop.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index ecd05ce1f..bab35782f 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -27,8 +27,10 @@ if hadoop_home != None: assert os.path.exists(hadoop_streaming_jar), "HADDOP_HOME does not contain the haddop streaming jar" if hadoop_binary == None or hadoop_streaming_jar == None: - warnings.warn('Warning: Cannot auto-detect path to hadoop and hadoop-streaming jar, need to set them via arguments -hs and -hb\n'\ - '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME or modify rabit_hadoop.py line 14') + warnings.warn('Warning: Cannot auto-detect path to hadoop and hadoop-streaming jar\n'\ + '\tneed to set them via arguments -hs and -hb\n'\ + '\tTo enable auto-detection, you can set enviroment variable HADOOP_HOME'\ + ', or modify rabit_hadoop.py line 14') parser = argparse.ArgumentParser(description='Rabit script to submit rabit jobs using Hadoop Streaming.'\ 'This script support both Hadoop 1.0 and Yarn(MRv2), Yarn is recommended') From a120edc56e7c998abb3444d366fcb4dd599a9a5e Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 11 Jan 2015 11:48:08 -0800 Subject: [PATCH 198/531] shorter --- tracker/rabit_hadoop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index bab35782f..0d2850dcc 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -48,7 +48,7 @@ parser.add_argument('-ac', '--auto_file_cache', default=1, choices=[0, 1], type= help = 'whether automatically cache the files in the command to hadoop localfile, this is on by default') parser.add_argument('-f', '--files', default = [], action='append', help = 'the cached file list in mapreduce,'\ - ' the submission script will automatically cache all the files which appears in command to local folder'\ + ' the submission script will automatically cache all the files which appears in command'\ ' This will also cause rewritten of all the file names in the command to current path,'\ ' for example `../../kmeans ../kmeans.conf` will be rewritten to `./kmeans kmeans.conf`'\ ' because the two files are cached to running folder.'\ From 6b651176a3e8dcdec618bddc84442142c4d01881 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 11 Jan 2015 21:28:13 -0800 Subject: [PATCH 199/531] yarn is part of hadoop script --- guide/README.md | 5 +---- tracker/rabit_hadoop.py | 3 +-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/guide/README.md b/guide/README.md index a2ef94160..8d3aa1b29 100644 --- a/guide/README.md +++ b/guide/README.md @@ -197,13 +197,10 @@ Rabit is a portable library that can run on multiple platforms. * You can use [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) to run rabit programs on hadoop * This will start n rabit programs as mappers of MapReduce * Each program can read its portion of data from stdin -* Yarn is highly recommended, since Yarn allows specifying number of cpus and memory of each mapper: +* Yarn(Hadoop 2.0 or higher) is highly recommended, since Yarn allows specifying number of cpus and memory of each mapper: - This allows multi-threading programs in each node, which can be more efficient - An easy multi-threading solution could be to use OpenMP with rabit code -#### Running Rabit on Yarn -* To Be modified from [../tracker/rabit_hadoop.py](../tracker/rabit_hadoop.py) - #### Running Rabit using MPI * You can submit rabit programs to an MPI cluster using [../tracker/rabit_mpi.py](../tracker/rabit_mpi.py). * If you linked your code against librabit_mpi.a, then you can directly use mpirun to submit the job diff --git a/tracker/rabit_hadoop.py b/tracker/rabit_hadoop.py index 0d2850dcc..2883b1c47 100755 --- a/tracker/rabit_hadoop.py +++ b/tracker/rabit_hadoop.py @@ -89,8 +89,7 @@ assert out[0] == 'Hadoop', 'cannot parse hadoop version string' hadoop_version = out[1].split('.') use_yarn = int(hadoop_version[0]) >= 2 -if not use_yarn: - print 'Current Hadoop Version is %s' % out[1] +print 'Current Hadoop Version is %s' % out[1] def hadoop_streaming(nworker, worker_args, use_yarn): fset = set() From 30f3971bee701e8713944690443dd1f9caa4cc36 Mon Sep 17 00:00:00 2001 From: nachocano Date: Mon, 12 Jan 2015 00:14:40 -0800 Subject: [PATCH 200/531] adding more description to toolkit --- guide/README.md | 2 +- toolkit/README.md | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/guide/README.md b/guide/README.md index 8d3aa1b29..0f25eae23 100644 --- a/guide/README.md +++ b/guide/README.md @@ -112,7 +112,7 @@ int main(int argc, char *argv[]) { if (version == 0) model.InitModel(); // the version number marks the iteration to resume for (int iter = version; iter < max_iter; ++iter) { - // model should be sufficient variable at this point + // at this point, the model object should allow us to recover the program state ... // each iteration can contain multiple calls of allreduce/broadcast rabit::Allreduce(&data[0], n); diff --git a/toolkit/README.md b/toolkit/README.md index 5a3845465..c1c4eb693 100644 --- a/toolkit/README.md +++ b/toolkit/README.md @@ -1,8 +1,36 @@ Toolkit ==== -This folder contains example toolkit developed using rabit +This folder contains some example toolkits developed with rabit to help you get started. KMeans ==== -* Kmeans taks in LIBSVM format -* You will need a dummy label field at beginning of all the lines to get KMeans + +#### How to run it +You will need to build the program with ```make```. +If you want to run it with Hadoop, you can execute the [./kmeans_hadoop.sh](./kmeans_hadoop.sh) script from your master node in cluster. +You will have to edit the file in order to specify the path to the Hadoop Streaming jar. Afterwards, you can execute it with the following arguments (in the exact same order): + +1) number of worker nodes in your Hadoop cluster (i.e. number of slaves) +2) path to the input data (HDFS path where you put the data) +3) number of clusters K +4) number of iterations to perform +5) output path (HDFS path where to store the output data, must be new) + +If you take a look at [./kmeans_hadoop.sh](./kmeans_hadoop.sh), you can see that it runs the kmeans.rabit version. If you want to run the program backed by the mock, you will need to update it accordingly, i.e. use kmeans.mock instead. + +The current implementation runs for the amount of iterations you specify in the command line argument. If you would like to add some convergence criteria (e.g. when no cluster assignment changes between iterations you stop or something like that) you will have to modify [./kmeans.cc](./kmeans.cc). We leave that as an exercise to the reader :) + +#### Input File Format +KMeans uses LIBSVM format to parse the input. If you are not familiar with LIBSVM, [http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/](here) you will find more details. + +The format is the following: + +