workaround macosx java test race condition (#74)
* fix error in dmlc#57, clean up comments and naming * include missing packages, disable recovery tests for now * disable local_recover tests until we have a bug fix * support larger cluster * fix lint, merge with master * fix mac osx test failure in https://github.com/dmlc/xgboost/pull/3818 * Update allreduce_robust.cc
This commit is contained in:
parent
3a35dabfae
commit
eb2590b774
@ -50,6 +50,12 @@ void AllreduceRobust::Shutdown(void) {
|
|||||||
// execute check ack step, load happens here
|
// execute check ack step, load happens here
|
||||||
utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp),
|
utils::Assert(RecoverExec(NULL, 0, ActionSummary::kCheckAck, ActionSummary::kSpecialOp),
|
||||||
"Shutdown: check ack must return true");
|
"Shutdown: check ack must return true");
|
||||||
|
#ifdef __APPLE__
|
||||||
|
// In OSX, one worker shutdowns and closes sockets while rest still run kCheckAck
|
||||||
|
// This cause rest workers checkandrecover and hang inf, https://github.com/dmlc/xgboost/pull/3818
|
||||||
|
// TODO: a fundamental fix for this
|
||||||
|
sleep(2);
|
||||||
|
#endif
|
||||||
AllreduceBase::Shutdown();
|
AllreduceBase::Shutdown();
|
||||||
}
|
}
|
||||||
/*!
|
/*!
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user