workaround macosx java test race condition (#74)

* fix error in dmlc#57, clean up comments and naming

* include missing packages, disable recovery tests for now

* disable local_recover tests until we have a bug fix

* support larger cluster

* fix lint, merge with master

* fix mac osx test failure in https://github.com/dmlc/xgboost/pull/3818

* Update allreduce_robust.cc
This commit is contained in:
Chen Qin 2018-10-26 12:39:31 -07:00 committed by Nan Zhu
parent 3a35dabfae
commit eb2590b774

View File

@ -50,6 +50,12 @@ void AllreduceRobust::Shutdown(void) {
// execute check ack step, load happens here
utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp),
"Shutdown: check ack must return true");
#ifdef __APPLE__
// In OSX, one worker shutdowns and closes sockets while rest still run kCheckAck
// This cause rest workers checkandrecover and hang inf, https://github.com/dmlc/xgboost/pull/3818
// TODO: a fundamental fix for this
sleep(2);
#endif
AllreduceBase::Shutdown();
}
/*!