change in socket, to pass out error code

This commit is contained in:
tqchen 2014-11-27 16:17:07 -08:00
parent c565104491
commit 2f1ba40786
2 changed files with 173 additions and 118 deletions

View File

@ -76,7 +76,7 @@ class SyncManager : public IEngine {
} }
// initialize the manager // initialize the manager
inline void Init(void) { inline void Init(void) {
utils::TCPSocket::Startup(); utils::Socket::Startup();
// single node mode // single node mode
if (master_uri == "NULL") return; if (master_uri == "NULL") return;
utils::Assert(links.size() == 0, "can only call Init once"); utils::Assert(links.size() == 0, "can only call Init once");
@ -86,7 +86,9 @@ class SyncManager : public IEngine {
// get information from master // get information from master
utils::TCPSocket master; utils::TCPSocket master;
master.Create(); master.Create();
master.Connect(utils::SockAddr(master_uri.c_str(), master_port)); if (!master.Connect(utils::SockAddr(master_uri.c_str(), master_port))) {
utils::Socket::Error("Connect");
}
utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 1"); utils::Assert(master.SendAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 1");
utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 2"); utils::Assert(master.RecvAll(&magic, sizeof(magic)) == sizeof(magic), "sync::Init failure 2");
utils::Check(magic == kMagic, "sync::Invalid master message, init failure"); utils::Check(magic == kMagic, "sync::Invalid master message, init failure");
@ -213,7 +215,9 @@ class SyncManager : public IEngine {
// read data from childs // read data from childs
for (int i = 0; i < nlink; ++i) { for (int i = 0; i < nlink; ++i) {
if (i != parent_index && selecter.CheckRead(links[i].sock)) { if (i != parent_index && selecter.CheckRead(links[i].sock)) {
links[i].ReadToRingBuffer(size_up_out); if (!links[i].ReadToRingBuffer(size_up_out)) {
utils::Socket::Error("Recv");
}
} }
} }
// this node have childs, peform reduce // this node have childs, peform reduce
@ -252,15 +256,25 @@ class SyncManager : public IEngine {
} }
if (parent_index != -1) { if (parent_index != -1) {
// pass message up to parent, can pass data that are already been reduced // pass message up to parent, can pass data that are already been reduced
if (selecter.CheckWrite(links[parent_index].sock)) { if (selecter.CheckWrite(links[parent_index].sock)) {
size_up_out += links[parent_index].sock. ssize_t len = links[parent_index].sock.
Send(sendrecvbuf + size_up_out, size_up_reduce - size_up_out); Send(sendrecvbuf + size_up_out, size_up_reduce - size_up_out);
if (len != -1) {
size_up_out += static_cast<size_t>(len);
} else {
if (errno != EAGAIN && errno != EWOULDBLOCK) utils::Socket::Error("Recv");
}
} }
// read data from parent // read data from parent
if (selecter.CheckRead(links[parent_index].sock)) { if (selecter.CheckRead(links[parent_index].sock)) {
size_down_in += links[parent_index].sock. ssize_t len = links[parent_index].sock.
Recv(sendrecvbuf + size_down_in, total_size - size_down_in); Recv(sendrecvbuf + size_down_in, total_size - size_down_in);
utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error"); if (len != -1) {
size_down_in += static_cast<size_t>(len);
utils::Assert(size_down_in <= size_up_out, "AllReduce: boundary error");
} else {
if (errno != EAGAIN && errno != EWOULDBLOCK) utils::Socket::Error("Recv");
}
} }
} else { } else {
// this is root, can use reduce as most recent point // this is root, can use reduce as most recent point
@ -272,7 +286,9 @@ class SyncManager : public IEngine {
for (int i = 0; i < nlink; ++i) { for (int i = 0; i < nlink; ++i) {
if (i != parent_index) { if (i != parent_index) {
if (selecter.CheckWrite(links[i].sock)) { if (selecter.CheckWrite(links[i].sock)) {
links[i].WriteFromArray(sendrecvbuf, size_down_in); if (!links[i].WriteFromArray(sendrecvbuf, size_down_in)) {
utils::Socket::Error("Send");
}
} }
nfinished = std::min(links[i].size_write, nfinished); nfinished = std::min(links[i].size_write, nfinished);
} }
@ -317,7 +333,9 @@ class SyncManager : public IEngine {
// probe in-link // probe in-link
for (int i = 0; i < nlink; ++i) { for (int i = 0; i < nlink; ++i) {
if (selecter.CheckRead(links[i].sock)) { if (selecter.CheckRead(links[i].sock)) {
links[i].ReadToArray(sendrecvbuf_, total_size); if (!links[i].ReadToArray(sendrecvbuf_, total_size)) {
utils::Socket::Error("Recv");
}
size_in = links[i].size_read; size_in = links[i].size_read;
if (size_in != 0) { if (size_in != 0) {
in_link = i; break; in_link = i; break;
@ -327,7 +345,9 @@ class SyncManager : public IEngine {
} else { } else {
// read from in link // read from in link
if (in_link >= 0 && selecter.CheckRead(links[in_link].sock)) { if (in_link >= 0 && selecter.CheckRead(links[in_link].sock)) {
links[in_link].ReadToArray(sendrecvbuf_, total_size); if(!links[in_link].ReadToArray(sendrecvbuf_, total_size)) {
utils::Socket::Error("Recv");
}
size_in = links[in_link].size_read; size_in = links[in_link].size_read;
} }
} }
@ -336,7 +356,9 @@ class SyncManager : public IEngine {
for (int i = 0; i < nlink; ++i) { for (int i = 0; i < nlink; ++i) {
if (i != in_link) { if (i != in_link) {
if (selecter.CheckWrite(links[i].sock)) { if (selecter.CheckWrite(links[i].sock)) {
links[i].WriteFromArray(sendrecvbuf_, size_in); if (!links[i].WriteFromArray(sendrecvbuf_, size_in)) {
utils::Socket::Error("Send");
}
} }
nfinished = std::min(nfinished, links[i].size_write); nfinished = std::min(nfinished, links[i].size_write);
} }
@ -390,32 +412,44 @@ class SyncManager : public IEngine {
* position after protect_start * position after protect_start
* \param protect_start all data start from protect_start is still needed in buffer * \param protect_start all data start from protect_start is still needed in buffer
* read shall not override this * read shall not override this
* \return true if it is an successful read, false if there is some error happens, check errno
*/ */
inline void ReadToRingBuffer(size_t protect_start) { inline bool ReadToRingBuffer(size_t protect_start) {
size_t ngap = size_read - protect_start; size_t ngap = size_read - protect_start;
utils::Assert(ngap <= buffer_size, "AllReduce: boundary check"); utils::Assert(ngap <= buffer_size, "AllReduce: boundary check");
size_t offset = size_read % buffer_size; size_t offset = size_read % buffer_size;
size_t nmax = std::min(buffer_size - ngap, buffer_size - offset); size_t nmax = std::min(buffer_size - ngap, buffer_size - offset);
size_read += sock.Recv(buffer_head + offset, nmax); ssize_t len = sock.Recv(buffer_head + offset, nmax);
if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK;
size_read += static_cast<size_t>(len);
return true;
} }
/*! /*!
* \brief read data into array, * \brief read data into array,
* this function can not be used together with ReadToRingBuffer * this function can not be used together with ReadToRingBuffer
* a link can either read into the ring buffer, or existing array * a link can either read into the ring buffer, or existing array
* \param max_size maximum size of array * \param max_size maximum size of array
* \return true if it is an successful read, false if there is some error happens, check errno
*/ */
inline void ReadToArray(void *recvbuf_, size_t max_size) { inline bool ReadToArray(void *recvbuf_, size_t max_size) {
char *p = static_cast<char*>(recvbuf_); char *p = static_cast<char*>(recvbuf_);
size_read += sock.Recv(p + size_read, max_size - size_read); ssize_t len = sock.Recv(p + size_read, max_size - size_read);
if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK;
size_read += static_cast<size_t>(len);
return true;
} }
/*! /*!
* \brief write data in array to sock * \brief write data in array to sock
* \param sendbuf_ head of array * \param sendbuf_ head of array
* \param max_size maximum size of array * \param max_size maximum size of array
* \return true if it is an successful write, false if there is some error happens, check errno
*/ */
inline void WriteFromArray(const void *sendbuf_, size_t max_size) { inline bool WriteFromArray(const void *sendbuf_, size_t max_size) {
const char *p = static_cast<const char*>(sendbuf_); const char *p = static_cast<const char*>(sendbuf_);
size_write += sock.Send(p + size_write, max_size - size_write); ssize_t len = sock.Send(p + size_write, max_size - size_write);
if (len == -1) return errno == EAGAIN || errno == EWOULDBLOCK;
size_write += static_cast<size_t>(len);
return true;
} }
private: private:

View File

@ -22,7 +22,6 @@
#include <cstring> #include <cstring>
#include "./utils.h" #include "./utils.h"
namespace utils {
#if defined(_WIN32) #if defined(_WIN32)
typedef int ssize_t; typedef int ssize_t;
typedef int sock_size_t; typedef int sock_size_t;
@ -32,6 +31,7 @@ typedef size_t sock_size_t;
const int INVALID_SOCKET = -1; const int INVALID_SOCKET = -1;
#endif #endif
namespace utils {
/*! \brief data structure for network address */ /*! \brief data structure for network address */
struct SockAddr { struct SockAddr {
sockaddr_in addr; sockaddr_in addr;
@ -74,36 +74,18 @@ struct SockAddr {
return std::string(s); return std::string(s);
} }
}; };
/*! /*!
* \brief a wrapper of TCP socket that hopefully be cross platform * \brief base class containing common operations of TCP and UDP sockets
*/ */
class TCPSocket { class Socket {
public: public:
/*! \brief the file descriptor of socket */ /*! \brief the file descriptor of socket */
SOCKET sockfd; SOCKET sockfd;
// constructor
TCPSocket(void) : sockfd(INVALID_SOCKET) {
}
explicit TCPSocket(SOCKET sockfd) : sockfd(sockfd) {
}
~TCPSocket(void) {
// do nothing in destructor
// user need to take care of close
}
// default conversion to int // default conversion to int
inline operator SOCKET() const { inline operator SOCKET() const {
return sockfd; return sockfd;
} }
/*!
* \brief create the socket, call this before using socket
* \param af domain
*/
inline void Create(int af = PF_INET) {
sockfd = socket(PF_INET, SOCK_STREAM, 0);
if (sockfd == INVALID_SOCKET) {
SockError("Create");
}
}
/*! /*!
* \brief start up the socket module * \brief start up the socket module
* call this before using the sockets * call this before using the sockets
@ -112,7 +94,7 @@ class TCPSocket {
#ifdef _WIN32 #ifdef _WIN32
WSADATA wsa_data; WSADATA wsa_data;
if (WSAStartup(MAKEWORD(2, 2), &wsa_data) != -1) { if (WSAStartup(MAKEWORD(2, 2), &wsa_data) != -1) {
SockError("Startup"); Socket::Error("Startup");
} }
if (LOBYTE(wsa_data.wVersion) != 2 || HIBYTE(wsa_data.wVersion) != 2) { if (LOBYTE(wsa_data.wVersion) != 2 || HIBYTE(wsa_data.wVersion) != 2) {
WSACleanup(); WSACleanup();
@ -137,12 +119,12 @@ class TCPSocket {
#ifdef _WIN32 #ifdef _WIN32
u_long mode = non_block ? 1 : 0; u_long mode = non_block ? 1 : 0;
if (ioctlsocket(sockfd, FIONBIO, &mode) != NO_ERROR) { if (ioctlsocket(sockfd, FIONBIO, &mode) != NO_ERROR) {
SockError("SetNonBlock"); Socket::Error("SetNonBlock");
} }
#else #else
int flag = fcntl(sockfd, F_GETFL, 0); int flag = fcntl(sockfd, F_GETFL, 0);
if (flag == -1) { if (flag == -1) {
SockError("SetNonBlock-1"); Socket::Error("SetNonBlock-1");
} }
if (non_block) { if (non_block) {
flag |= O_NONBLOCK; flag |= O_NONBLOCK;
@ -150,10 +132,81 @@ class TCPSocket {
flag &= ~O_NONBLOCK; flag &= ~O_NONBLOCK;
} }
if (fcntl(sockfd, F_SETFL, flag) == -1) { if (fcntl(sockfd, F_SETFL, flag) == -1) {
SockError("SetNonBlock-2"); Socket::Error("SetNonBlock-2");
} }
#endif #endif
} }
/*!
* \brief bind the socket to an address
* \param addr
*/
inline void Bind(const SockAddr &addr) {
if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == -1) {
Socket::Error("Bind");
}
}
/*!
* \brief try bind the socket to host, from start_port to end_port
* \param start_port starting port number to try
* \param end_port ending port number to try
* \return the port successfully bind to, return -1 if failed to bind any port
*/
inline int TryBindHost(int start_port, int end_port) {
for (int port = start_port; port < end_port; ++port) {
SockAddr addr("0.0.0.0", port);
if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == 0) {
return port;
}
if (errno != EADDRINUSE) {
Socket::Error("TryBindHost");
}
}
return -1;
}
/*! \brief close the socket */
inline void Close(void) {
if (sockfd != INVALID_SOCKET) {
#ifdef _WIN32
closesocket(sockfd);
#else
close(sockfd);
#endif
sockfd = INVALID_SOCKET;
} else {
Error("Socket::Close double close the socket or close without create");
}
}
// report an socket error
inline static void Error(const char *msg) {
int errsv = errno;
utils::Error("Socket %s Error:%s", msg, strerror(errsv));
}
protected:
explicit Socket(SOCKET sockfd) : sockfd(sockfd) {
}
};
/*!
* \brief a wrapper of TCP socket that hopefully be cross platform
*/
class TCPSocket : public Socket{
public:
// constructor
TCPSocket(void) : Socket(INVALID_SOCKET) {
}
explicit TCPSocket(SOCKET sockfd) : Socket(sockfd) {
}
/*!
* \brief create the socket, call this before using socket
* \param af domain
*/
inline void Create(int af = PF_INET) {
sockfd = socket(PF_INET, SOCK_STREAM, 0);
if (sockfd == INVALID_SOCKET) {
Socket::Error("Create");
}
}
/*! /*!
* \brief perform listen of the socket * \brief perform listen of the socket
* \param backlog backlog parameter * \param backlog backlog parameter
@ -165,93 +218,43 @@ class TCPSocket {
TCPSocket Accept(void) { TCPSocket Accept(void) {
SOCKET newfd = accept(sockfd, NULL, NULL); SOCKET newfd = accept(sockfd, NULL, NULL);
if (newfd == INVALID_SOCKET) { if (newfd == INVALID_SOCKET) {
SockError("Accept"); Socket::Error("Accept");
} }
return TCPSocket(newfd); return TCPSocket(newfd);
} }
/*!
* \brief bind the socket to an address
* \param addr
*/
inline void Bind(const SockAddr &addr) {
if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == -1) {
SockError("Bind");
}
}
/*!
* \brief try bind the socket to host, from start_port to end_port
* \param start_port starting port number to try
* \param end_port ending port number to try
* \param out_addr the binding address, if successful
* \return whether the binding is successful
*/
inline int TryBindHost(int start_port, int end_port) {
for (int port = start_port; port < end_port; ++port) {
SockAddr addr("0.0.0.0", port);
if (bind(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == 0) {
return port;
}
if (errno != EADDRINUSE) {
SockError("TryBindHost");
}
}
return -1;
}
/*! /*!
* \brief connect to an address * \brief connect to an address
* \param addr the address to connect to * \param addr the address to connect to
* \return whether connect is successful
*/ */
inline void Connect(const SockAddr &addr) { inline bool Connect(const SockAddr &addr) {
if (connect(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == -1) { return connect(sockfd, (sockaddr*)&addr.addr, sizeof(addr.addr)) == 0;
SockError("Connect");
}
}
/*! \brief close the connection */
inline void Close(void) {
if (sockfd != -1) {
#ifdef _WIN32
closesocket(sockfd);
#else
close(sockfd);
#endif
sockfd = INVALID_SOCKET;
} else {
Error("TCPSocket::Close double close the socket or close without create");
}
} }
/*! /*!
* \brief send data using the socket * \brief send data using the socket
* \param buf the pointer to the buffer * \param buf the pointer to the buffer
* \param len the size of the buffer * \param len the size of the buffer
* \param flags extra flags * \param flags extra flags
* \return size of data actually sent * \return size of data actually sent
* return -1 if error occurs
*/ */
inline size_t Send(const void *buf_, size_t len, int flag = 0) { inline ssize_t Send(const void *buf_, size_t len, int flag = 0) {
const char *buf = reinterpret_cast<const char*>(buf_); const char *buf = reinterpret_cast<const char*>(buf_);
if (len == 0) return 0; if (len == 0) return 0;
ssize_t ret = send(sockfd, buf, static_cast<sock_size_t>(len), flag); return send(sockfd, buf, static_cast<sock_size_t>(len), flag);
if (ret == -1) { }
if (errno == EAGAIN || errno == EWOULDBLOCK) return 0;
SockError("Send");
}
return ret;
}
/*! /*!
* \brief receive data using the socket * \brief receive data using the socket
* \param buf_ the pointer to the buffer * \param buf_ the pointer to the buffer
* \param len the size of the buffer * \param len the size of the buffer
* \param flags extra flags * \param flags extra flags
* \return size of data actually received * \return size of data actually received
* return -1 if error occurs
*/ */
inline size_t Recv(void *buf_, size_t len, int flags = 0) { inline ssize_t Recv(void *buf_, size_t len, int flags = 0) {
char *buf = reinterpret_cast<char*>(buf_); char *buf = reinterpret_cast<char*>(buf_);
if (len == 0) return 0; if (len == 0) return 0;
ssize_t ret = recv(sockfd, buf, static_cast<sock_size_t>(len), flags); return recv(sockfd, buf, static_cast<sock_size_t>(len), flags);
if (ret == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) return 0;
SockError("Recv");
}
return ret;
} }
/*! /*!
* \brief peform block write that will attempt to send all data out * \brief peform block write that will attempt to send all data out
@ -267,7 +270,7 @@ class TCPSocket {
ssize_t ret = send(sockfd, buf, static_cast<ssize_t>(len - ndone), 0); ssize_t ret = send(sockfd, buf, static_cast<ssize_t>(len - ndone), 0);
if (ret == -1) { if (ret == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) return ndone; if (errno == EAGAIN || errno == EWOULDBLOCK) return ndone;
SockError("Recv"); Socket::Error("SendAll");
} }
buf += ret; buf += ret;
ndone += ret; ndone += ret;
@ -288,7 +291,7 @@ class TCPSocket {
ssize_t ret = recv(sockfd, buf, static_cast<sock_size_t>(len - ndone), MSG_WAITALL); ssize_t ret = recv(sockfd, buf, static_cast<sock_size_t>(len - ndone), MSG_WAITALL);
if (ret == -1) { if (ret == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) return ndone; if (errno == EAGAIN || errno == EWOULDBLOCK) return ndone;
SockError("Recv"); Socket::Error("RecvAll");
} }
if (ret == 0) return ndone; if (ret == 0) return ndone;
buf += ret; buf += ret;
@ -298,12 +301,8 @@ class TCPSocket {
} }
private: private:
// report an socket error
inline static void SockError(const char *msg) {
int errsv = errno;
Error("Socket %s Error:%s", msg, strerror(errsv));
}
}; };
/*! \brief helper data structure to perform select */ /*! \brief helper data structure to perform select */
struct SelectHelper { struct SelectHelper {
public: public:
@ -326,6 +325,14 @@ struct SelectHelper {
write_fds.push_back(fd); write_fds.push_back(fd);
if (fd > maxfd) maxfd = fd; if (fd > maxfd) maxfd = fd;
} }
/*!
* \brief add file descriptor to watch for exception
* \param fd file descriptor to be watched
*/
inline void WatchException(SOCKET fd) {
except_fds.push_back(fd);
if (fd > maxfd) maxfd = fd;
}
/*! /*!
* \brief Check if the descriptor is ready for read * \brief Check if the descriptor is ready for read
* \param fd file descriptor to check status * \param fd file descriptor to check status
@ -340,12 +347,20 @@ struct SelectHelper {
inline bool CheckWrite(SOCKET fd) const { inline bool CheckWrite(SOCKET fd) const {
return FD_ISSET(fd, &write_set) != 0; return FD_ISSET(fd, &write_set) != 0;
} }
/*!
* \brief Check if the descriptor has any exception
* \param fd file descriptor to check status
*/
inline bool CheckExcept(SOCKET fd) const {
return FD_ISSET(fd, &except_set) != 0;
}
/*! /*!
* \brief clear all the monitored descriptors * \brief clear all the monitored descriptors
*/ */
inline void Clear(void) { inline void Clear(void) {
read_fds.clear(); read_fds.clear();
write_fds.clear(); write_fds.clear();
except_fds.clear();
maxfd = 0; maxfd = 0;
} }
/*! /*!
@ -356,20 +371,26 @@ struct SelectHelper {
inline int Select(long timeout = 0) { inline int Select(long timeout = 0) {
FD_ZERO(&read_set); FD_ZERO(&read_set);
FD_ZERO(&write_set); FD_ZERO(&write_set);
FD_ZERO(&except_set);
for (size_t i = 0; i < read_fds.size(); ++i) { for (size_t i = 0; i < read_fds.size(); ++i) {
FD_SET(read_fds[i], &read_set); FD_SET(read_fds[i], &read_set);
} }
for (size_t i = 0; i < write_fds.size(); ++i) { for (size_t i = 0; i < write_fds.size(); ++i) {
FD_SET(write_fds[i], &write_set); FD_SET(write_fds[i], &write_set);
} }
for (size_t i = 0; i < except_fds.size(); ++i) {
FD_SET(except_fds[i], &except_set);
}
int ret; int ret;
if (timeout == 0) { if (timeout == 0) {
ret = select(static_cast<int>(maxfd + 1), &read_set, &write_set, NULL, NULL); ret = select(static_cast<int>(maxfd + 1), &read_set,
&write_set, &except_set, NULL);
} else { } else {
timeval tm; timeval tm;
tm.tv_usec = (timeout % 1000) * 1000; tm.tv_usec = (timeout % 1000) * 1000;
tm.tv_sec = timeout / 1000; tm.tv_sec = timeout / 1000;
ret = select(static_cast<int>(maxfd + 1), &read_set, &write_set, NULL, &tm); ret = select(static_cast<int>(maxfd + 1), &read_set,
&write_set, &except_set, &tm);
} }
if (ret == -1) { if (ret == -1) {
int errsv = errno; int errsv = errno;
@ -380,8 +401,8 @@ struct SelectHelper {
private: private:
SOCKET maxfd; SOCKET maxfd;
fd_set read_set, write_set; fd_set read_set, write_set, except_set;
std::vector<SOCKET> read_fds, write_fds; std::vector<SOCKET> read_fds, write_fds, except_fds;
}; };
} }
#endif #endif