Initial support for federated learning (#7831)

Federated learning plugin for xgboost: * A gRPC server to aggregate MPI-style requests (allgather, allreduce, broadcast) from federated workers. * A Rabit engine for the federated environment. * Integration test to simulate federated learning. Additional followups are needed to address GPU support, better security, and privacy, etc.
2022-05-05 06:49:22 -07:00
parent 46e0bce212
commit 14ef38b834
16 changed files with 1087 additions and 1 deletions
--- a/plugin/federated/federated.proto
+++ b/plugin/federated/federated.proto
@@ -0,0 +1,68 @@
+/*!
+ * Copyright 2022 XGBoost contributors
+ */
+syntax = "proto3";
+
+package xgboost.federated;
+
+service Federated {
+  rpc Allgather(AllgatherRequest) returns (AllgatherReply) {}
+  rpc Allreduce(AllreduceRequest) returns (AllreduceReply) {}
+  rpc Broadcast(BroadcastRequest) returns (BroadcastReply) {}
+}
+
+enum DataType {
+  CHAR = 0;
+  UCHAR = 1;
+  INT = 2;
+  UINT = 3;
+  LONG = 4;
+  ULONG = 5;
+  FLOAT = 6;
+  DOUBLE = 7;
+  LONGLONG = 8;
+  ULONGLONG = 9;
+}
+
+enum ReduceOperation {
+  MAX = 0;
+  MIN = 1;
+  SUM = 2;
+}
+
+message AllgatherRequest {
+  // An incrementing counter that is unique to each round to operations.
+  uint64 sequence_number = 1;
+  int32 rank = 2;
+  bytes send_buffer = 3;
+}
+
+message AllgatherReply {
+  bytes receive_buffer = 1;
+}
+
+message AllreduceRequest {
+  // An incrementing counter that is unique to each round to operations.
+  uint64 sequence_number = 1;
+  int32 rank = 2;
+  bytes send_buffer = 3;
+  DataType data_type = 4;
+  ReduceOperation reduce_operation = 5;
+}
+
+message AllreduceReply {
+  bytes receive_buffer = 1;
+}
+
+message BroadcastRequest {
+  // An incrementing counter that is unique to each round to operations.
+  uint64 sequence_number = 1;
+  int32 rank = 2;
+  bytes send_buffer = 3;
+  // The root rank to broadcast from.
+  int32 root = 4;
+}
+
+message BroadcastReply {
+  bytes receive_buffer = 1;
+}