[coll] Reduce the amount of open files (socket). (#10693)

Reduce the chance of hitting `Failed to call `socket`: Too many open files`.
This commit is contained in:
Jiaming Yuan
2024-08-13 05:23:49 +08:00
committed by GitHub
parent d414fdf2e7
commit 43704549a2
3 changed files with 23 additions and 7 deletions

View File

@@ -141,7 +141,7 @@ Result ConnectTrackerImpl(proto::PeerInfo info, std::chrono::seconds timeout, st
for (std::int32_t r = (comm.Rank() + 1); r < comm.World(); ++r) {
auto const& peer = peers[r];
std::shared_ptr<TCPSocket> worker{TCPSocket::CreatePtr(comm.Domain())};
auto worker = std::make_shared<TCPSocket>();
rc = std::move(rc)
<< [&] { return Connect(peer.host, peer.port, retry, timeout, worker.get()); }
<< [&] { return worker->RecvTimeout(timeout); };
@@ -161,7 +161,7 @@ Result ConnectTrackerImpl(proto::PeerInfo info, std::chrono::seconds timeout, st
}
for (std::int32_t r = 0; r < comm.Rank(); ++r) {
auto peer = std::shared_ptr<TCPSocket>(TCPSocket::CreatePtr(comm.Domain()));
auto peer = std::make_shared<TCPSocket>();
rc = std::move(rc) << [&] {
SockAddress addr;
return listener->Accept(peer.get(), &addr);

View File

@@ -118,7 +118,9 @@ std::size_t TCPSocket::Send(StringView str) {
addr_len = sizeof(addr.V6().Handle());
}
conn = TCPSocket::Create(addr.Domain());
if (conn.IsClosed()) {
conn = TCPSocket::Create(addr.Domain());
}
CHECK_EQ(static_cast<std::int32_t>(conn.Domain()), static_cast<std::int32_t>(addr.Domain()));
auto non_blocking = conn.NonBlocking();
auto rc = conn.NonBlocking(true);