From a8128493c29b57924e81b2de56c6fb952de7b1c2 Mon Sep 17 00:00:00 2001 From: nachocano Date: Fri, 28 Nov 2014 01:48:26 -0800 Subject: [PATCH] execute it like this: ./test.sh 4 4000 testcase0.conf ./ Now we are passing the folder where the round instances are saved. The problem is that calling utils::Check or utils::Assert on 1 or 2 nodes, shutdowns all of them. Only those should be shutdown and this will work. There maybe some other mechanism to shutdown a particular node. Tianqi? --- src/mock.h | 52 ++++++++++++++++++++++++++++++----------- test/test.sh | 7 +++--- test/test_allreduce.cpp | 2 +- test/testcase1.conf | 5 +--- 4 files changed, 45 insertions(+), 21 deletions(-) diff --git a/src/mock.h b/src/mock.h index 8eb5629e6..d6afd49c6 100644 --- a/src/mock.h +++ b/src/mock.h @@ -7,7 +7,9 @@ */ #include "./allreduce.h" #include "./config.h" - #include +#include +#include +#include /*! \brief namespace of mock */ @@ -18,8 +20,8 @@ class Mock { public: - Mock(const int& rank, char *config) : rank(rank) { - Init(config); + explicit Mock(const int& rank, char *config, char* round_dir) : rank(rank) { + Init(config, round_dir); } template @@ -46,20 +48,42 @@ public: private: - inline void Init(char* config) { + inline void Init(char* config, char* round_dir) { + std::stringstream ss; + ss << round_dir << "node" << rank << ".round"; + const char* round_file = ss.str().c_str(); + std::ifstream ifs(round_file); + int current_round = 1; + if (!ifs.good()) { + // file does not exists, it's the first time, so save the current round to 1 + std::ofstream ofs(round_file); + ofs << current_round; + ofs.close(); + } else { + // file does exists, read the previous round, increment by one, and save it back + ifs >> current_round; + current_round++; + ifs.close(); + std::ofstream ofs(round_file); + ofs << current_round; + ofs.close(); + } + printf("[%d] in round %d\n", rank, current_round); utils::ConfigIterator itr(config); while (itr.Next()) { char round[4], node_rank[4]; sscanf(itr.name(), "%[^_]_%s", round, node_rank); - int i_round = atoi(round); - if (i_round == 1) { - int i_node_rank = atoi(node_rank); - if (i_node_rank == rank) { - printf("[%d] round %d, value %s\n", rank, i_round, itr.val()); - if (strcmp("allreduce", itr.val())) record(allReduce); - else if (strcmp("broadcast", itr.val())) record(broadcast); - else if (strcmp("loadcheckpoint", itr.val())) record(loadCheckpoint); - else if (strcmp("checkpoint", itr.val())) record(checkpoint); + int i_node_rank = atoi(node_rank); + // if it's something for me + if (i_node_rank == rank) { + int i_round = atoi(round); + // in my current round + if (i_round == current_round) { + printf("[%d] round %d, value %s\n", rank, i_round, itr.val()); + if (strcmp("allreduce", itr.val())) record(allReduce); + else if (strcmp("broadcast", itr.val())) record(broadcast); + else if (strcmp("loadcheckpoint", itr.val())) record(loadCheckpoint); + else if (strcmp("checkpoint", itr.val())) record(checkpoint); } } } @@ -82,6 +106,8 @@ private: std::map broadcast; std::map loadCheckpoint; std::map checkpoint; + + }; } diff --git a/test/test.sh b/test/test.sh index 753085724..5c70404ac 100755 --- a/test/test.sh +++ b/test/test.sh @@ -1,7 +1,8 @@ #!/bin/bash -if [ "$#" -ne 3 ]; +if [ "$#" -ne 4 ]; then - echo "Usage " + echo "Usage " exit -1 fi -../submit_job_tcp.py $1 test_allreduce $2 $3 + +../submit_job_tcp.py $1 test_allreduce $2 $3 $4 \ No newline at end of file diff --git a/test/test_allreduce.cpp b/test/test_allreduce.cpp index 807b1a1bb..9afdc6d03 100644 --- a/test/test_allreduce.cpp +++ b/test/test_allreduce.cpp @@ -72,7 +72,7 @@ int main(int argc, char *argv[]) { int rank = sync::GetRank(); std::string name = sync::GetProcessorName(); - test::Mock mock(rank, argv[2]); + test::Mock mock(rank, argv[2], argv[3]); printf("[%d] start at %s\n", rank, name.c_str()); TestMax(mock, n); diff --git a/test/testcase1.conf b/test/testcase1.conf index f5aa31892..cc9bd662c 100644 --- a/test/testcase1.conf +++ b/test/testcase1.conf @@ -5,8 +5,5 @@ 1_0 = allreduce 1_1 = broadcast -1_2 = loadcheckpoint -1_3 = checkpoint -2_0 = allreduce -2_2 = checkpoint +2_2 = allreduce