workaround macosx java test race condition (#74)
* fix error in dmlc#57, clean up comments and naming * include missing packages, disable recovery tests for now * disable local_recover tests until we have a bug fix * support larger cluster * fix lint, merge with master * fix mac osx test failure in https://github.com/dmlc/xgboost/pull/3818 * Update allreduce_robust.cc
This commit is contained in:
parent
3a35dabfae
commit
eb2590b774
@ -50,6 +50,12 @@ void AllreduceRobust::Shutdown(void) {
|
||||
// execute check ack step, load happens here
|
||||
utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp),
|
||||
"Shutdown: check ack must return true");
|
||||
#ifdef __APPLE__
|
||||
// In OSX, one worker shutdowns and closes sockets while rest still run kCheckAck
|
||||
// This cause rest workers checkandrecover and hang inf, https://github.com/dmlc/xgboost/pull/3818
|
||||
// TODO: a fundamental fix for this
|
||||
sleep(2);
|
||||
#endif
|
||||
AllreduceBase::Shutdown();
|
||||
}
|
||||
/*!
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user