add native script
This commit is contained in:
@@ -1,14 +0,0 @@
|
||||
#!/bin/bash
|
||||
if [ "$#" -lt 1 ];
|
||||
then
|
||||
echo "Usage: program parameters"
|
||||
echo "Repeatively run program until success"
|
||||
exit -1
|
||||
fi
|
||||
nrep=0
|
||||
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK
|
||||
until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep; do
|
||||
sleep 1
|
||||
nrep=$((nrep+1))
|
||||
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep
|
||||
done
|
||||
10
test/test.mk
10
test/test.mk
@@ -10,17 +10,17 @@ endif
|
||||
|
||||
|
||||
local_recover:
|
||||
../tracker/rabit_mpi.py -n $(nslave) test_local_recover $(ndata) rabit_local_replica=1
|
||||
../tracker/rabit_demo.py -n $(nslave) test_local_recover $(ndata) rabit_local_replica=1
|
||||
|
||||
local_recover_10_10k:
|
||||
../tracker/rabit_mpi.py -n 10 test_local_recover 10000 rabit_local_replica=1
|
||||
../tracker/rabit_demo.py -n 10 test_local_recover 10000 rabit_local_replica=1
|
||||
|
||||
# this experiment test recovery with actually process exit, use keepalive to keep program alive
|
||||
model_recover_10_10k:
|
||||
../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0
|
||||
../tracker/rabit_demo.py -n 10 test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0
|
||||
|
||||
model_recover_10_10k_die_same:
|
||||
../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0
|
||||
../tracker/rabit_demo.py -n 10 test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0
|
||||
|
||||
model_recover_10_10k_die_hard:
|
||||
../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0
|
||||
../tracker/rabit_demo.py -n 10 test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0
|
||||
|
||||
Reference in New Issue
Block a user