[jvm-packages] fix potential unit test suites aborted issue (#6373)
* fix race conditio * code cleaning rm pom.xml-e * clean again * fix compilation issue * recover * avoid using getOrCreate * interrupt zombie threads * safe guard * fix deadlock * Update SparkParallelismTracker.scala
This commit is contained in:
@@ -45,12 +45,26 @@ trait PerTest extends BeforeAndAfterEach { self: FunSuite =>
|
||||
override def beforeEach(): Unit = getOrCreateSession
|
||||
|
||||
override def afterEach() {
|
||||
synchronized {
|
||||
TaskFailedListener.sparkContextShutdownLock.synchronized {
|
||||
if (currentSession != null) {
|
||||
// this synchronization is mostly for the tests involving SparkContext shutdown
|
||||
// for unit test involving the sparkContext shutdown there are two different events sequence
|
||||
// 1. SparkContext killer is executed before afterEach, in this case, before SparkContext
|
||||
// is fully stopped, afterEach() will block at the following code block
|
||||
// 2. SparkContext killer is executed afterEach, in this case, currentSession.stop() in will
|
||||
// block to wait for all msgs in ListenerBus get processed. Because currentSession.stop()
|
||||
// has been called, SparkContext killer will not take effect
|
||||
while (TaskFailedListener.killerStarted) {
|
||||
TaskFailedListener.sparkContextShutdownLock.wait()
|
||||
}
|
||||
currentSession.stop()
|
||||
cleanExternalCache(currentSession.sparkContext.appName)
|
||||
currentSession = null
|
||||
}
|
||||
if (TaskFailedListener.sparkContextKiller != null) {
|
||||
TaskFailedListener.sparkContextKiller.interrupt()
|
||||
TaskFailedListener.sparkContextKiller = null
|
||||
}
|
||||
TaskFailedListener.killerStarted = false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,6 +114,7 @@ class XGBoostRabitRegressionSuite extends FunSuite with PerTest {
|
||||
// assume all tasks throw exception almost same time
|
||||
// 100ms should be enough to exhaust all retries
|
||||
assert(waitAndCheckSparkShutdown(100) == true)
|
||||
TaskFailedListener.killerStarted = false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user