Make federated client more robust (#8351)

This commit is contained in:
Rong Ou 2022-10-17 22:52:44 -07:00 committed by GitHub
parent 5647fc6542
commit 521086d56b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -28,8 +28,11 @@ class FederatedClient {
options.pem_cert_chain = client_cert; options.pem_cert_chain = client_cert;
grpc::ChannelArguments args; grpc::ChannelArguments args;
args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max()); args.SetMaxReceiveMessageSize(std::numeric_limits<int>::max());
return Federated::NewStub( auto channel =
grpc::CreateCustomChannel(server_address, grpc::SslCredentials(options), args)); grpc::CreateCustomChannel(server_address, grpc::SslCredentials(options), args);
channel->WaitForConnected(
gpr_time_add(gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_seconds(60, GPR_TIMESPAN)));
return Federated::NewStub(channel);
}()}, }()},
rank_{rank} {} rank_{rank} {}
@ -51,6 +54,7 @@ class FederatedClient {
AllgatherReply reply; AllgatherReply reply;
grpc::ClientContext context; grpc::ClientContext context;
context.set_wait_for_ready(true);
grpc::Status status = stub_->Allgather(&context, request, &reply); grpc::Status status = stub_->Allgather(&context, request, &reply);
if (status.ok()) { if (status.ok()) {
@ -72,6 +76,7 @@ class FederatedClient {
AllreduceReply reply; AllreduceReply reply;
grpc::ClientContext context; grpc::ClientContext context;
context.set_wait_for_ready(true);
grpc::Status status = stub_->Allreduce(&context, request, &reply); grpc::Status status = stub_->Allreduce(&context, request, &reply);
if (status.ok()) { if (status.ok()) {
@ -91,6 +96,7 @@ class FederatedClient {
BroadcastReply reply; BroadcastReply reply;
grpc::ClientContext context; grpc::ClientContext context;
context.set_wait_for_ready(true);
grpc::Status status = stub_->Broadcast(&context, request, &reply); grpc::Status status = stub_->Broadcast(&context, request, &reply);
if (status.ok()) { if (status.ok()) {