new mock test

This commit is contained in:
tqchen 2014-12-20 18:38:54 -08:00
parent 10bb407a2c
commit e40047f9c2
4 changed files with 8 additions and 5 deletions

View File

@ -25,7 +25,7 @@ test_model_recover: test_model_recover.o $(RABIT_OBJ)
test_local_recover: test_local_recover.o $(RABIT_OBJ)
$(BIN) :
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mock
$(OBJ) :
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )

View File

@ -7,8 +7,8 @@ then
fi
nrep=0
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK
until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do
until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep; do
sleep 1
nrep=$((nrep+1))
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep
done

View File

@ -17,4 +17,5 @@ local_recover_10_10k:
# this experiment test recovery with actually process exit, use keepalive to keep program alive
model_recover_10_10k:
../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000
../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0

View File

@ -11,6 +11,7 @@ using namespace rabit;
namespace rabit {
namespace test {
inline void CallBegin(const char *fun, int ntrial, int iter) {
return;
int rank = rabit::GetRank();
if (!strcmp(fun, "Allreduce::Sum")) {
if (ntrial == iter && rank == 0) exit(-1);
@ -20,6 +21,7 @@ inline void CallBegin(const char *fun, int ntrial, int iter) {
}
}
inline void CallEnd(const char *fun, int ntrial, int iter) {
return;
int rank = rabit::GetRank();
if (!strcmp(fun, "Allreduce::Bcast")) {
if (ntrial == iter && rand() % 10 == rank) exit(-1);
@ -129,7 +131,7 @@ int main(int argc, char *argv[]) {
int ntrial = 0;
for (int i = 1; i < argc; ++i) {
int n;
if (sscanf(argv[i], "repeat=%d", &n) == 1) ntrial = n;
if (sscanf(argv[i], "rabit_num_trial=%d", &n) == 1) ntrial = n;
}
while (true) {
try {