[rabit] Improved connection handling. (#9531)

- Enable timeout.
- Report connection error from the system.
- Handle retry for both tracker connection and peer connection.
This commit is contained in:
Jiaming Yuan
2023-08-30 13:00:04 +08:00
committed by GitHub
parent 2462e22cd4
commit ccfc90e4c6
10 changed files with 463 additions and 130 deletions

View File

@@ -1,5 +1,5 @@
/*!
* Copyright (c) 2022 by XGBoost Contributors
/**
* Copyright 2022-2023 by XGBoost Contributors
*/
#include <gtest/gtest.h>
#include <xgboost/collective/socket.h>
@@ -10,8 +10,7 @@
#include "../helpers.h"
namespace xgboost {
namespace collective {
namespace xgboost::collective {
TEST(Socket, Basic) {
system::SocketStartup();
@@ -31,15 +30,16 @@ TEST(Socket, Basic) {
TCPSocket client;
if (domain == SockDomain::kV4) {
auto const& addr = SockAddrV4::Loopback().Addr();
ASSERT_EQ(Connect(MakeSockAddress(StringView{addr}, port), &client), std::errc{});
auto rc = Connect(StringView{addr}, port, 1, std::chrono::seconds{3}, &client);
ASSERT_TRUE(rc.OK()) << rc.Report();
} else {
auto const& addr = SockAddrV6::Loopback().Addr();
auto rc = Connect(MakeSockAddress(StringView{addr}, port), &client);
auto rc = Connect(StringView{addr}, port, 1, std::chrono::seconds{3}, &client);
// some environment (docker) has restricted network configuration.
if (rc == std::error_code{EADDRNOTAVAIL, std::system_category()}) {
if (!rc.OK() && rc.Code() == std::error_code{EADDRNOTAVAIL, std::system_category()}) {
GTEST_SKIP_(msg.c_str());
}
ASSERT_EQ(rc, std::errc{});
ASSERT_EQ(rc, Success()) << rc.Report();
}
ASSERT_EQ(client.Domain(), domain);
@@ -73,5 +73,4 @@ TEST(Socket, Basic) {
system::SocketFinalize();
}
} // namespace collective
} // namespace xgboost
} // namespace xgboost::collective