add native script

This commit is contained in:
tqchen
2014-12-30 04:37:50 -08:00
parent 1bcea65117
commit bfb9aa3d77
5 changed files with 65 additions and 23 deletions

View File

@@ -1,14 +0,0 @@
#!/bin/bash
if [ "$#" -lt 1 ];
then
echo "Usage: program parameters"
echo "Repeatively run program until success"
exit -1
fi
nrep=0
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK
until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep; do
sleep 1
nrep=$((nrep+1))
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep
done

View File

@@ -10,17 +10,17 @@ endif
local_recover:
../tracker/rabit_mpi.py -n $(nslave) test_local_recover $(ndata) rabit_local_replica=1
../tracker/rabit_demo.py -n $(nslave) test_local_recover $(ndata) rabit_local_replica=1
local_recover_10_10k:
../tracker/rabit_mpi.py -n 10 test_local_recover 10000 rabit_local_replica=1
../tracker/rabit_demo.py -n 10 test_local_recover 10000 rabit_local_replica=1
# this experiment test recovery with actually process exit, use keepalive to keep program alive
model_recover_10_10k:
../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0
../tracker/rabit_demo.py -n 10 test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0
model_recover_10_10k_die_same:
../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0
../tracker/rabit_demo.py -n 10 test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0
model_recover_10_10k_die_hard:
../tracker/rabit_mpi.py -n 10 keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0
../tracker/rabit_demo.py -n 10 test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0 mock=1,1,1,1 mock=0,1,1,0 mock=4,1,1,0 mock=9,1,1,0 mock=8,1,2,0 mock=4,1,3,0