new mock test
This commit is contained in:
parent
10bb407a2c
commit
e40047f9c2
@ -25,7 +25,7 @@ test_model_recover: test_model_recover.o $(RABIT_OBJ)
|
|||||||
test_local_recover: test_local_recover.o $(RABIT_OBJ)
|
test_local_recover: test_local_recover.o $(RABIT_OBJ)
|
||||||
|
|
||||||
$(BIN) :
|
$(BIN) :
|
||||||
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit
|
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc, $^) $(LDFLAGS) -lrabit_mock
|
||||||
|
|
||||||
$(OBJ) :
|
$(OBJ) :
|
||||||
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
|
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) )
|
||||||
|
|||||||
@ -7,8 +7,8 @@ then
|
|||||||
fi
|
fi
|
||||||
nrep=0
|
nrep=0
|
||||||
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK
|
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK
|
||||||
until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do
|
until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep; do
|
||||||
sleep 1
|
sleep 1
|
||||||
nrep=$((nrep+1))
|
nrep=$((nrep+1))
|
||||||
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep
|
echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK rabit_num_trial=$nrep
|
||||||
done
|
done
|
||||||
|
|||||||
@ -17,4 +17,5 @@ local_recover_10_10k:
|
|||||||
|
|
||||||
# this experiment test recovery with actually process exit, use keepalive to keep program alive
|
# this experiment test recovery with actually process exit, use keepalive to keep program alive
|
||||||
model_recover_10_10k:
|
model_recover_10_10k:
|
||||||
../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000
|
../tracker/rabit_mpi.py 10 local keepalive.sh test_model_recover 10000 mock=0,0,1,0 mock=1,1,1,0
|
||||||
|
|
||||||
|
|||||||
@ -11,6 +11,7 @@ using namespace rabit;
|
|||||||
namespace rabit {
|
namespace rabit {
|
||||||
namespace test {
|
namespace test {
|
||||||
inline void CallBegin(const char *fun, int ntrial, int iter) {
|
inline void CallBegin(const char *fun, int ntrial, int iter) {
|
||||||
|
return;
|
||||||
int rank = rabit::GetRank();
|
int rank = rabit::GetRank();
|
||||||
if (!strcmp(fun, "Allreduce::Sum")) {
|
if (!strcmp(fun, "Allreduce::Sum")) {
|
||||||
if (ntrial == iter && rank == 0) exit(-1);
|
if (ntrial == iter && rank == 0) exit(-1);
|
||||||
@ -20,6 +21,7 @@ inline void CallBegin(const char *fun, int ntrial, int iter) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline void CallEnd(const char *fun, int ntrial, int iter) {
|
inline void CallEnd(const char *fun, int ntrial, int iter) {
|
||||||
|
return;
|
||||||
int rank = rabit::GetRank();
|
int rank = rabit::GetRank();
|
||||||
if (!strcmp(fun, "Allreduce::Bcast")) {
|
if (!strcmp(fun, "Allreduce::Bcast")) {
|
||||||
if (ntrial == iter && rand() % 10 == rank) exit(-1);
|
if (ntrial == iter && rand() % 10 == rank) exit(-1);
|
||||||
@ -129,7 +131,7 @@ int main(int argc, char *argv[]) {
|
|||||||
int ntrial = 0;
|
int ntrial = 0;
|
||||||
for (int i = 1; i < argc; ++i) {
|
for (int i = 1; i < argc; ++i) {
|
||||||
int n;
|
int n;
|
||||||
if (sscanf(argv[i], "repeat=%d", &n) == 1) ntrial = n;
|
if (sscanf(argv[i], "rabit_num_trial=%d", &n) == 1) ntrial = n;
|
||||||
}
|
}
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user