[rabit] Small cleanup to tracker initialization. (#9524)

- Remove recover related code.
- Clean startup, no need to consider previously connected nodes.
This commit is contained in:
Jiaming Yuan
2023-08-27 05:10:59 +08:00
committed by GitHub
parent 209335b18c
commit 1b87a1d8f8
2 changed files with 3 additions and 20 deletions

View File

@@ -137,15 +137,9 @@ class WorkerEntry:
return self._get_remote(wait_conn, nnset)
def _get_remote(
self, wait_conn: Dict[int, "WorkerEntry"], nnset: Set[int]
self, wait_conn: Dict[int, "WorkerEntry"], badset: Set[int]
) -> List[int]:
while True:
ngood = self.sock.recvint()
goodset = set()
for _ in range(ngood):
goodset.add(self.sock.recvint())
assert goodset.issubset(nnset)
badset = nnset - goodset
conset = []
for r in badset:
if r in wait_conn:
@@ -343,7 +337,7 @@ class RabitTracker:
shutdown[s.rank] = s
logging.debug("Received %s signal from %d", s.cmd, s.rank)
continue
assert s.cmd in ("start", "recover")
assert s.cmd == "start"
# lazily initialize the workers
if tree_map is None:
assert s.cmd == "start"