new mock test
This commit is contained in:
parent
10bb407a2c
commit
e40047f9c2
@ -25,7 +25,7 @@ test_model_recover: test_model_recover.o $(RABIT_OBJ)
|
||||
test_local_recover: test_local_recover.o $(RABIT_OBJ)
|
||||
|
||||
$(BIN) :
|
||||
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit
|
||||
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mock
|
||||
|
||||
$(OBJ) :
|
||||
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
|
||||
|
||||
@ -7,8 +7,8 @@ then
|
||||
fi
|
||||
nrep=0
|
||||
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK
|
||||
until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do
|
||||
until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep; do
|
||||
sleep 1
|
||||
nrep=$((nrep+1))
|
||||
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep
|
||||
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep
|
||||
done
|
||||
|
||||
@ -17,4 +17,5 @@ local_recover_10_10k:
|
||||
|
||||
# this experiment test recovery with actually process exit, use keepalive to keep program alive
|
||||
model_recover_10_10k:
|
||||
../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000
|
||||
../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0
|
||||
|
||||
|
||||
@ -11,6 +11,7 @@ using namespace rabit;
|
||||
namespace rabit {
|
||||
namespace test {
|
||||
inline void CallBegin(const char *fun, int ntrial, int iter) {
|
||||
return;
|
||||
int rank = rabit::GetRank();
|
||||
if (!strcmp(fun, "Allreduce::Sum")) {
|
||||
if (ntrial == iter && rank == 0) exit(-1);
|
||||
@ -20,6 +21,7 @@ inline void CallBegin(const char *fun, int ntrial, int iter) {
|
||||
}
|
||||
}
|
||||
inline void CallEnd(const char *fun, int ntrial, int iter) {
|
||||
return;
|
||||
int rank = rabit::GetRank();
|
||||
if (!strcmp(fun, "Allreduce::Bcast")) {
|
||||
if (ntrial == iter && rand() % 10 == rank) exit(-1);
|
||||
@ -129,7 +131,7 @@ int main(int argc, char *argv[]) {
|
||||
int ntrial = 0;
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
int n;
|
||||
if (sscanf(argv[i], "repeat=%d", &n) == 1) ntrial = n;
|
||||
if (sscanf(argv[i], "rabit_num_trial=%d", &n) == 1) ntrial = n;
|
||||
}
|
||||
while (true) {
|
||||
try {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user