From 58331067f828661ceed3c924f39361f3b9aeb02a Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 18 Dec 2014 23:50:59 -0800 Subject: [PATCH] cleanup testcases --- src/allreduce_base.cc | 12 ++-- src/allreduce_robust.cc | 8 +-- submit_hadoop.py | 1 - submit_mpi.py | 3 +- test/Makefile | 19 +++--- test/README.md | 18 ++++++ {src => test}/config.h | 0 test/keepalive.sh | 6 +- {src => test}/mock.h | 3 + test/speed_runner.py | 2 +- test/speed_test.cpp | 4 +- test/test.mk | 20 ++++++ test/test_allreduce.cpp | 90 -------------------------- test/test_local_recover.cpp | 68 ++++++++++++-------- test/test_model_recover.cpp | 69 ++++++++++++-------- test/test_recover.cpp | 125 ------------------------------------ test/testcase0.conf | 1 - test/testcase1.conf | 9 --- 18 files changed, 152 insertions(+), 306 deletions(-) create mode 100644 test/README.md rename {src => test}/config.h (100%) rename {src => test}/mock.h (99%) create mode 100644 test/test.mk delete mode 100644 test/test_allreduce.cpp delete mode 100644 test/test_recover.cpp delete mode 100644 test/testcase0.conf delete mode 100644 test/testcase1.conf diff --git a/src/allreduce_base.cc b/src/allreduce_base.cc index 72fa12e79..99e12561c 100644 --- a/src/allreduce_base.cc +++ b/src/allreduce_base.cc @@ -26,7 +26,7 @@ AllreduceBase::AllreduceBase(void) { hadoop_mode = 0; version_number = 0; task_id = "NULL"; - this->SetParam("reduce_buffer", "256MB"); + this->SetParam("rabit_reduce_buffer", "256MB"); } // initialization function @@ -38,8 +38,8 @@ void AllreduceBase::Init(void) { utils::Check(task_id != NULL, "hadoop_mode is set but cannot find mapred_task_id"); } if (task_id != NULL) { - this->SetParam("task_id", task_id); - this->SetParam("hadoop_mode", "1"); + this->SetParam("rabit_task_id", task_id); + this->SetParam("rabit_hadoop_mode", "1"); } } // start socket @@ -83,9 +83,9 @@ void AllreduceBase::Shutdown(void) { void AllreduceBase::SetParam(const char *name, const char *val) { if (!strcmp(name, "rabit_tracker_uri")) tracker_uri = val; if (!strcmp(name, "rabit_tracker_port")) tracker_port = atoi(val); - if (!strcmp(name, "task_id")) task_id = val; - if (!strcmp(name, "hadoop_mode")) hadoop_mode = atoi(val); - if (!strcmp(name, "reduce_buffer")) { + if (!strcmp(name, "rabit_task_id")) task_id = val; + if (!strcmp(name, "rabit_hadoop_mode")) hadoop_mode = atoi(val); + if (!strcmp(name, "rabit_reduce_buffer")) { char unit; unsigned long amount; if (sscanf(val, "%lu%c", &amount, &unit) == 2) { diff --git a/src/allreduce_robust.cc b/src/allreduce_robust.cc index 88a6dcace..828f57e60 100644 --- a/src/allreduce_robust.cc +++ b/src/allreduce_robust.cc @@ -17,10 +17,10 @@ namespace rabit { namespace engine { AllreduceRobust::AllreduceRobust(void) { - result_buffer_round = 1; num_local_replica = 0; seq_counter = 0; local_chkpt_version = 0; + result_buffer_round = 1; } /*! \brief shutdown the engine */ void AllreduceRobust::Shutdown(void) { @@ -42,11 +42,11 @@ void AllreduceRobust::Shutdown(void) { */ void AllreduceRobust::SetParam(const char *name, const char *val) { AllreduceBase::SetParam(name, val); - if (!strcmp(name, "result_buffer_round")) result_buffer_round = atoi(val); - if (!strcmp(name, "result_replicate")) { + if (!strcmp(name, "rabit_buffer_round")) result_buffer_round = atoi(val); + if (!strcmp(name, "rabit_global_replica")) { result_buffer_round = std::max(world_size / atoi(val), 1); } - if (!strcmp(name, "num_local_replica")) { + if (!strcmp(name, "rabit_local_replica")) { num_local_replica = atoi(val); } } diff --git a/submit_hadoop.py b/submit_hadoop.py index ae315f60a..3852b9f1d 100755 --- a/submit_hadoop.py +++ b/submit_hadoop.py @@ -17,7 +17,6 @@ parser.add_argument('-hs', '--hadoop_streaming_jar', required=True) parser.add_argument('-i', '--input', required=True) parser.add_argument('-o', '--output', required=True) parser.add_argument('-m', '--mapper', required=True) -#parser.add_argument('-r', '--reducer', required=False) parser.add_argument('-k', '--nclusters', required=True, type=int) parser.add_argument('-itr', '--iterations', required=True, type=int) args = parser.parse_args() diff --git a/submit_mpi.py b/submit_mpi.py index 468604317..3b2b68c54 100755 --- a/submit_mpi.py +++ b/submit_mpi.py @@ -1,6 +1,6 @@ #!/usr/bin/python """ -This is an example script to create a customized job submit +This is an example script to create a customized job submit with mpi script using rabit engine """ import sys @@ -34,6 +34,7 @@ def mpi_submit(nslave, args): if __name__ == '__main__': if len(sys.argv) < 2: print 'Usage: ' + print 'if == local, we will run using local mode' exit(0) # call submit, with nslave, the commands to run each job and submit function tracker.submit(int(sys.argv[1]), sys.argv[2:], fun_submit= mpi_submit) diff --git a/test/Makefile b/test/Makefile index 9f742be74..2f3b81251 100644 --- a/test/Makefile +++ b/test/Makefile @@ -5,33 +5,30 @@ export LDFLAGS= -pthread -lm -lrt export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src # specify tensor path -BIN = test_allreduce test_recover test_model_recover speed_test test_local_recover +BIN = speed_test test_model_recover test_local_recover # objectives that makes up rabit library RABIT_OBJ = allreduce_base.o allreduce_robust.o engine.o MPIOBJ = engine_mpi.o -OBJ = $(RABIT_OBJ) test_allreduce.o test_recover.o test_model_recover.o speed_test.o test_local_recover.o -MPIBIN = test_allreduce.mpi speed_test.mpi +OBJ = $(RABIT_OBJ) speed_test.o test_model_recover.o test_local_recover.o +MPIBIN = speed_test.mpi .PHONY: clean all all: $(BIN) $(MPIBIN) - +# the rabit library allreduce_base.o: ../src/allreduce_base.cc ../src/*.h engine.o: ../src/engine.cc ../src/*.h -allreduce_robust.o: ../src/allreduce_robust.cc ../src/*.h engine_mpi.o: ../src/engine_mpi.cc -test_allreduce.o: test_allreduce.cpp ../src/*.h +allreduce_robust.o: ../src/allreduce_robust.cc ../src/*.h + +# programs speed_test.o: speed_test.cpp ../src/*.h -test_recover.o: test_recover.cpp ../src/*.h test_model_recover.o: test_model_recover.cpp ../src/*.h test_local_recover.o: test_local_recover.cpp ../src/*.h # we can link against MPI version to get use MPI -test_allreduce: test_allreduce.o $(RABIT_OBJ) -test_allreduce.mpi: test_allreduce.o $(MPIOBJ) speed_test: speed_test.o $(RABIT_OBJ) speed_test.mpi: speed_test.o $(MPIOBJ) -test_recover: test_recover.o $(RABIT_OBJ) test_model_recover: test_model_recover.o $(RABIT_OBJ) test_local_recover: test_local_recover.o $(RABIT_OBJ) @@ -48,4 +45,4 @@ $(MPIOBJ) : $(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c %.cc, $^) ) clean: - $(RM) $(OBJ) $(BIN) $(MPIBIN) *~ ../src/*~ + $(RM) $(OBJ) $(BIN) $(MPIBIN) $(MPIOBJ) *~ ../src/*~ diff --git a/test/README.md b/test/README.md new file mode 100644 index 000000000..fb68112bf --- /dev/null +++ b/test/README.md @@ -0,0 +1,18 @@ +Testcases of Rabit +==== +This folder contains internal testcases to test correctness and efficiency of rabit API + +The example running scripts for testcases are given by test.mk +* type ```make -f test.mk testcasename``` to run certain testcase + + +Helper Scripts +==== +* test.mk contains Makefile documentation of all testcases +* keepalive.sh helper bash to restart a program when it dies abnormally + +List of Programs +==== +* speed_test: test the running speed of rabit API +* test_local_recover: test recovery of local state when error happens +* test_model_recover: test recovery of global state when error happens diff --git a/src/config.h b/test/config.h similarity index 100% rename from src/config.h rename to test/config.h diff --git a/test/keepalive.sh b/test/keepalive.sh index ddfc5d618..854de0c33 100755 --- a/test/keepalive.sh +++ b/test/keepalive.sh @@ -6,9 +6,9 @@ then exit -1 fi nrep=0 -echo ./$@ task_id=$OMPI_COMM_WORLD_RANK -until ./$@ task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do +echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK +until ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep; do sleep 1 nrep=$((nrep+1)) - echo ./$@ job_id=$OMPI_COMM_WORLD_RANK repeat=$nrep + echo ./$@ rabit_task_id=$OMPI_COMM_WORLD_RANK repeat=$nrep done diff --git a/src/mock.h b/test/mock.h similarity index 99% rename from src/mock.h rename to test/mock.h index e5a4c283a..a5ac39c83 100644 --- a/src/mock.h +++ b/test/mock.h @@ -11,6 +11,9 @@ #include #include +struct MockException { +}; + namespace rabit { /*! \brief namespace of mock */ namespace test { diff --git a/test/speed_runner.py b/test/speed_runner.py index 7331c9075..1644bfe99 100644 --- a/test/speed_runner.py +++ b/test/speed_runner.py @@ -31,4 +31,4 @@ def main(): sys.stderr.flush() if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/test/speed_test.cpp b/test/speed_test.cpp index c4410a562..8f7fc68bf 100644 --- a/test/speed_test.cpp +++ b/test/speed_test.cpp @@ -1,3 +1,4 @@ +// This program is used to test the speed of rabit API #include #include #include @@ -12,8 +13,6 @@ double max_tdiff, sum_tdiff, bcast_tdiff, tot_tdiff; inline void TestMax(size_t n) { int rank = rabit::GetRank(); - //int nproc = rabit::GetWorldSize(); - std::vector ndata(n); for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % 111; @@ -25,7 +24,6 @@ inline void TestMax(size_t n) { inline void TestSum(size_t n) { int rank = rabit::GetRank(); - //int nproc = rabit::GetWorldSize(); const int z = 131; std::vector ndata(n); for (size_t i = 0; i < ndata.size(); ++i) { diff --git a/test/test.mk b/test/test.mk new file mode 100644 index 000000000..a70fcf050 --- /dev/null +++ b/test/test.mk @@ -0,0 +1,20 @@ +ifndef $(nslave) + nslave=2 +endif +ifndef $(ndata) + ndata=10 +endif + +# this is a makefile used to show testcases of rabit +.PHONY: model_recover local_recover speed + + +local_recover: + ../submit_mpi.py $(nslave) local test_local_recover $(ndata) rabit_local_replica=1 + +local_recover_10_10k: + ../submit_mpi.py 10 local test_local_recover 10000 rabit_local_replica=1 + +# this experiment test recovery with actually process exit, use keepalive to keep program alive +model_recover_10_10k: + ../submit_mpi.py 10 local keepalive.sh test_model_recover 10000 diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp deleted file mode 100644 index 707b1a22a..000000000 --- a/test/test_allreduce.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include -#include -#include -#include -#include - -using namespace rabit; - -inline void TestMax(test::Mock &mock, size_t n) { - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % 111; - } - mock.Allreduce(&ndata[0], ndata.size()); - for (size_t i = 0; i < ndata.size(); ++i) { - float rmax = (i * 1) % 111; - for (int r = 0; r < nproc; ++r) { - rmax = std::max(rmax, (float)((i * (r+1)) % 111)); - } - utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank); - } -} - -inline void TestSum(test::Mock &mock, size_t n) { - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - const int z = 131; - - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % z; - } - mock.Allreduce(&ndata[0], ndata.size()); - for (size_t i = 0; i < ndata.size(); ++i) { - float rsum = 0.0f; - for (int r = 0; r < nproc; ++r) { - rsum += (float)((i * (r+1)) % z); - } - utils::Check(fabsf(rsum - ndata[i]) < 1e-5 , - "[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]); - } -} - -inline void TestBcast(test::Mock &mock, size_t n, int root) { - int rank = rabit::GetRank(); - std::string s; s.resize(n); - for (size_t i = 0; i < n; ++i) { - s[i] = char(i % 126 + 1); - } - std::string res; - if (root == rank) { - res = s; - mock.Broadcast(&res, root); - } else { - mock.Broadcast(&res, root); - } - utils::Check(res == s, "[%d] TestBcast fail", rank); -} - -int main(int argc, char *argv[]) { - if (argc < 3) { - printf("Usage: \n"); - return 0; - } - int n = atoi(argv[1]); - rabit::Init(argc, argv); - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - std::string name = rabit::GetProcessorName(); - - test::Mock mock(rank, argv[2], argv[3]); - - utils::LogPrintf("[%d] start at %s\n", rank, name.c_str()); - TestMax(mock, n); - utils::LogPrintf("[%d] !!!TestMax pass\n", rank); - TestSum(mock, n); - utils::LogPrintf("[%d] !!!TestSum pass\n", rank); - int step = std::max(nproc / 3, 1); - for (int i = 0; i < nproc; i += step) { - TestBcast(mock, n, i); - } - utils::LogPrintf("[%d] !!!TestBcast pass\n", rank); - rabit::Finalize(); - printf("[%d] all check pass\n", rank); - return 0; -} diff --git a/test/test_local_recover.cpp b/test/test_local_recover.cpp index 106e04ef9..2d2c8234c 100644 --- a/test/test_local_recover.cpp +++ b/test/test_local_recover.cpp @@ -5,12 +5,28 @@ #include #include #include -#include +#include "./mock.h" using namespace rabit; - -struct MockException { -}; +namespace rabit { +namespace test { +inline void CallBegin(const char *fun, int ntrial, int iter) { + int rank = rabit::GetRank(); + if (!strcmp(fun, "Allreduce::Sum")) { + if (ntrial == iter && rank == 0) throw MockException(); + } + if (!strcmp(fun, "Allreduce::Max")) { + if (ntrial == iter && rank == 3) throw MockException(); + } +} +inline void CallEnd(const char *fun, int ntrial, int iter) { + int rank = rabit::GetRank(); + if (!strcmp(fun, "Allreduce::Bcast")) { + if (ntrial == iter && rand() % 10 == rank) throw MockException(); + } +} +} +} // dummy model class Model : public rabit::utils::ISerializable { @@ -31,7 +47,7 @@ class Model : public rabit::utils::ISerializable { } }; -inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, int iter) { +inline void TestMax(Model *model, Model *local, int ntrial, int iter) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); const int z = iter + 111; @@ -39,11 +55,11 @@ inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, in std::vector ndata(model->data.size()); for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + local->data[i]; - } - mock.Allreduce(&ndata[0], ndata.size()); - if (ntrial == iter && rank == 1) { - throw MockException(); - } + } + test::CallBegin("Allreduce::Max", ntrial, iter); + rabit::Allreduce(&ndata[0], ndata.size()); + test::CallEnd("Allreduce::Max", ntrial, iter); + for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; for (int r = 0; r < nproc; ++r) { @@ -58,7 +74,7 @@ inline void TestMax(test::Mock &mock, Model *model, Model *local, int ntrial, in } } -inline void TestSum(test::Mock &mock, Model *model, Model *local, int ntrial, int iter) { +inline void TestSum(Model *model, Model *local, int ntrial, int iter) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); const int z = 131 + iter; @@ -67,11 +83,10 @@ inline void TestSum(test::Mock &mock, Model *model, Model *local, int ntrial, in for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + local->data[i]; } - if (ntrial == iter && rank == 0) { - throw MockException(); - } - mock.Allreduce(&ndata[0], ndata.size()); - + test::CallBegin("Allreduce::Sum", ntrial, iter); + Allreduce(&ndata[0], ndata.size()); + test::CallEnd("Allreduce::Sum", ntrial, iter); + for (size_t i = 0; i < ndata.size(); ++i) { float rsum = 0.0f; for (int r = 0; r < nproc; ++r) { @@ -86,7 +101,7 @@ inline void TestSum(test::Mock &mock, Model *model, Model *local, int ntrial, in } } -inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { +inline void TestBcast(size_t n, int root, int ntrial, int iter) { int rank = rabit::GetRank(); std::string s; s.resize(n); for (size_t i = 0; i < n; ++i) { @@ -95,16 +110,20 @@ inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { std::string res; if (root == rank) { res = s; - mock.Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); + rabit::Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); } else { - mock.Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); + rabit::Broadcast(&res, root); + test::CallEnd("Broadcast", ntrial, iter); } utils::Check(res == s, "[%d] TestBcast fail", rank); } int main(int argc, char *argv[]) { if (argc < 3) { - printf("Usage: \n"); + printf("Usage: \n"); return 0; } int n = atoi(argv[1]); @@ -112,7 +131,6 @@ int main(int argc, char *argv[]) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); std::string name = rabit::GetProcessorName(); - test::Mock mock(rank, argv[2], argv[3]); Model model, local; srand(0); int ntrial = 0; @@ -131,14 +149,14 @@ int main(int argc, char *argv[]) { utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } for (int r = iter; r < 3; ++r) { - TestMax(mock, &model, &local, ntrial, r); + TestMax(&model, &local, ntrial, r); utils::LogPrintf("[%d] !!!TestMax pass, iter=%d\n", rank, r); int step = std::max(nproc / 3, 1); for (int i = 0; i < nproc; i += step) { - //TestBcast(mock, n, i, ntrial); + TestBcast(n, i, ntrial, r); } - //utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); - TestSum(mock, &model, &local, ntrial, r); + utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); + TestSum(&model, &local, ntrial, r); utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); rabit::CheckPoint(&model, &local); utils::LogPrintf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); diff --git a/test/test_model_recover.cpp b/test/test_model_recover.cpp index 17432c06e..6feb56dde 100644 --- a/test/test_model_recover.cpp +++ b/test/test_model_recover.cpp @@ -5,12 +5,28 @@ #include #include #include -#include +#include "./mock.h" using namespace rabit; - -struct MockException { -}; +namespace rabit { +namespace test { +inline void CallBegin(const char *fun, int ntrial, int iter) { + int rank = rabit::GetRank(); + if (!strcmp(fun, "Allreduce::Sum")) { + if (ntrial == iter && rank == 0) exit(-1); + } + if (!strcmp(fun, "Allreduce::Max")) { + if (ntrial == iter && rank == 3) exit(-1); + } +} +inline void CallEnd(const char *fun, int ntrial, int iter) { + int rank = rabit::GetRank(); + if (!strcmp(fun, "Allreduce::Bcast")) { + if (ntrial == iter && rand() % 10 == rank) exit(-1); + } +} +} +} // dummy model class Model : public rabit::utils::ISerializable { @@ -31,7 +47,7 @@ class Model : public rabit::utils::ISerializable { } }; -inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { +inline void TestMax(Model *model, int ntrial, int iter) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); const int z = iter + 111; @@ -40,10 +56,10 @@ inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + model->data[i]; } - mock.Allreduce(&ndata[0], ndata.size()); - if (ntrial == 0 && rank == 3) { - exit(-1); - } + test::CallBegin("Allreduce::Max", ntrial, iter); + rabit::Allreduce(&ndata[0], ndata.size()); + test::CallEnd("Allreduce::Max", ntrial, iter); + for (size_t i = 0; i < ndata.size(); ++i) { float rmax = (i * 1) % z + model->data[i]; for (int r = 0; r < nproc; ++r) { @@ -54,7 +70,7 @@ inline void TestMax(test::Mock &mock, Model *model, int ntrial, int iter) { model->data = ndata; } -inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { +inline void TestSum(Model *model, int ntrial, int iter) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); const int z = 131 + iter; @@ -63,11 +79,9 @@ inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { for (size_t i = 0; i < ndata.size(); ++i) { ndata[i] = (i * (rank+1)) % z + model->data[i]; } - if (iter == 0 && ntrial==0 && rank == 0) { - throw MockException(); - } - - mock.Allreduce(&ndata[0], ndata.size()); + test::CallBegin("Allreduce::Sum", ntrial, iter); + Allreduce(&ndata[0], ndata.size()); + test::CallEnd("Allreduce::Sum", ntrial, iter); for (size_t i = 0; i < ndata.size(); ++i) { float rsum = model->data[i] * nproc; @@ -80,7 +94,7 @@ inline void TestSum(test::Mock &mock, Model *model, int ntrial, int iter) { model->data = ndata; } -inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { +inline void TestBcast(size_t n, int root, int ntrial, int iter) { int rank = rabit::GetRank(); std::string s; s.resize(n); for (size_t i = 0; i < n; ++i) { @@ -89,9 +103,13 @@ inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { std::string res; if (root == rank) { res = s; - mock.Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); + rabit::Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); } else { - mock.Broadcast(&res, root); + test::CallBegin("Broadcast", ntrial, iter); + rabit::Broadcast(&res, root); + test::CallEnd("Broadcast", ntrial, iter); } utils::Check(res == s, "[%d] TestBcast fail", rank); } @@ -106,7 +124,6 @@ int main(int argc, char *argv[]) { int rank = rabit::GetRank(); int nproc = rabit::GetWorldSize(); std::string name = rabit::GetProcessorName(); - test::Mock mock(rank, argv[2], argv[3]); Model model; srand(0); int ntrial = 0; @@ -124,14 +141,14 @@ int main(int argc, char *argv[]) { utils::LogPrintf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter); } for (int r = iter; r < 3; ++r) { - TestMax(mock, &model, ntrial, r); + TestMax(&model, ntrial, r); utils::LogPrintf("[%d] !!!TestMax pass, iter=%d\n", rank, r); - //int step = std::max(nproc / 3, 1); - //for (int i = 0; i < nproc; i += step) { - //TestBcast(mock, n, i, ntrial); - //} - //utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); - TestSum(mock, &model, ntrial, r); + int step = std::max(nproc / 3, 1); + for (int i = 0; i < nproc; i += step) { + TestBcast(n, i, ntrial, r); + } + utils::LogPrintf("[%d] !!!TestBcast pass, iter=%d\n", rank, r); + TestSum(&model, ntrial, r); utils::LogPrintf("[%d] !!!TestSum pass, iter=%d\n", rank, r); rabit::CheckPoint(&model); utils::LogPrintf("[%d] !!!CheckPont pass, iter=%d\n", rank, r); diff --git a/test/test_recover.cpp b/test/test_recover.cpp deleted file mode 100644 index 92aa60918..000000000 --- a/test/test_recover.cpp +++ /dev/null @@ -1,125 +0,0 @@ -#include -#include -#include -#include -#include -#include - -using namespace rabit; - -struct MockException { -}; - -inline void TestMax(test::Mock &mock, size_t n, int ntrial) { - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % 111; - } - mock.Allreduce(&ndata[0], ndata.size()); - if (ntrial == 0 && rank == 15) throw MockException(); - for (size_t i = 0; i < ndata.size(); ++i) { - float rmax = (i * 1) % 111; - for (int r = 0; r < nproc; ++r) { - rmax = std::max(rmax, (float)((i * (r+1)) % 111)); - } - utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank); - } -} - -inline void TestSum(test::Mock &mock, size_t n, int ntrial) { - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - const int z = 131; - - std::vector ndata(n); - for (size_t i = 0; i < ndata.size(); ++i) { - ndata[i] = (i * (rank+1)) % z; - } - mock.Allreduce(&ndata[0], ndata.size()); - - if (ntrial == 0 && rank == 0) throw MockException(); - - for (size_t i = 0; i < ndata.size(); ++i) { - float rsum = 0.0f; - for (int r = 0; r < nproc; ++r) { - rsum += (float)((i * (r+1)) % z); - } - utils::Check(fabsf(rsum - ndata[i]) < 1e-5 , - "[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]); - } -} - -inline void TestBcast(test::Mock &mock, size_t n, int root, int ntrial) { - int rank = rabit::GetRank(); - std::string s; s.resize(n); - for (size_t i = 0; i < n; ++i) { - s[i] = char(i % 126 + 1); - } - std::string res; - if (root == rank) { - res = s; - mock.Broadcast(&res, root); - } else { - mock.Broadcast(&res, root); - } - utils::Check(res == s, "[%d] TestBcast fail", rank); -} -// dummy model -class Model : public rabit::utils::ISerializable { - public: - // load from stream - virtual void Load(rabit::utils::IStream &fi) { - // do nothing - } - /*! \brief save the model to the stream */ - virtual void Save(rabit::utils::IStream &fo) const { - // do nothing - } - virtual void InitModel(void) { - // do nothing - } -}; - -int main(int argc, char *argv[]) { - if (argc < 3) { - printf("Usage: \n"); - return 0; - } - int n = atoi(argv[1]); - rabit::Init(argc, argv); - int rank = rabit::GetRank(); - int nproc = rabit::GetWorldSize(); - std::string name = rabit::GetProcessorName(); - test::Mock mock(rank, argv[2], argv[3]); - Model model; - srand(0); - int ntrial = 0; - while (true) { - try { - if (rabit::LoadCheckPoint(&model) == 0) { - model.InitModel(); - } - utils::LogPrintf("[%d/%d] start at %s\n", rank, ntrial, name.c_str()); - TestMax(mock, n, ntrial); - utils::LogPrintf("[%d/%d] !!!TestMax pass\n", rank, ntrial); - TestSum(mock, n, ntrial); - utils::LogPrintf("[%d/%d] !!!TestSum pass\n", rank, ntrial); - int step = std::max(nproc / 3, 1); - for (int i = 0; i < nproc; i += step) { - TestBcast(mock, n, i, ntrial); - } - utils::LogPrintf("[%d] !!!TestBcast pass\n", rank); - // reach here - break; - } catch (MockException &e) { - rabit::engine::GetEngine()->InitAfterException(); - ++ntrial; - } - } - rabit::Finalize(); - printf("[%d] all check pass\n", rank); - return 0; -} diff --git a/test/testcase0.conf b/test/testcase0.conf deleted file mode 100644 index 4c324d282..000000000 --- a/test/testcase0.conf +++ /dev/null @@ -1 +0,0 @@ -# Test Case 0 -> nothing fails \ No newline at end of file diff --git a/test/testcase1.conf b/test/testcase1.conf deleted file mode 100644 index cc9bd662c..000000000 --- a/test/testcase1.conf +++ /dev/null @@ -1,9 +0,0 @@ -# Test Case example config -# You configure which methods should fail -# Format _ = -# can be one of the following = allreduce, broadcast, loadcheckpoint, checkpoint - -1_0 = allreduce -1_1 = broadcast - -2_2 = allreduce