support larger cluster (#73)

* fix error in dmlc#57, clean up comments and naming

* include missing packages, disable recovery tests for now

* disable local_recover tests until we have a bug fix

* support larger cluster

* fix lint, merge with master
This commit is contained in:
Chen Qin
2018-10-22 10:13:45 -07:00
committed by Nan Zhu
parent 69cdfae22f
commit 3a35dabfae
5 changed files with 121 additions and 123 deletions

View File

@@ -4,6 +4,7 @@
all: model_recover_10_10k model_recover_10_10k_die_same model_recover_10_10k_die_hard local_recover_10_10k
# this experiment test recovery with actually process exit, use keepalive to keep program alive
# TODO: enable those tests once we fix issue in rabit
model_recover_10_10k:
../dmlc-core/tracker/dmlc-submit --cluster local --num-workers=10 model_recover 10000 mock=0,0,1,0 mock=1,1,1,0