execute it like this: ./test.sh 4 4000 testcase0.conf ./

Now we are passing the folder where the round instances are saved.
The problem is that calling utils::Check or utils::Assert on 1 or 2 nodes, shutdowns all of them. Only those should be shutdown and this will work. There maybe some other mechanism to shutdown a particular node. Tianqi?
This commit is contained in:
nachocano 2014-11-28 01:48:26 -08:00
parent faed8285cd
commit a8128493c2
4 changed files with 45 additions and 21 deletions

View File

@ -7,7 +7,9 @@
*/
#include "./allreduce.h"
#include "./config.h"
#include <map>
#include <map>
#include <sstream>
#include <fstream>
/*! \brief namespace of mock */
@ -18,8 +20,8 @@ class Mock {
public:
Mock(const int& rank, char *config) : rank(rank) {
Init(config);
explicit Mock(const int& rank, char *config, char* round_dir) : rank(rank) {
Init(config, round_dir);
}
template<typename OP>
@ -46,20 +48,42 @@ public:
private:
inline void Init(char* config) {
inline void Init(char* config, char* round_dir) {
std::stringstream ss;
ss << round_dir << "node" << rank << ".round";
const char* round_file = ss.str().c_str();
std::ifstream ifs(round_file);
int current_round = 1;
if (!ifs.good()) {
// file does not exists, it's the first time, so save the current round to 1
std::ofstream ofs(round_file);
ofs << current_round;
ofs.close();
} else {
// file does exists, read the previous round, increment by one, and save it back
ifs >> current_round;
current_round++;
ifs.close();
std::ofstream ofs(round_file);
ofs << current_round;
ofs.close();
}
printf("[%d] in round %d\n", rank, current_round);
utils::ConfigIterator itr(config);
while (itr.Next()) {
char round[4], node_rank[4];
sscanf(itr.name(), "%[^_]_%s", round, node_rank);
int i_round = atoi(round);
if (i_round == 1) {
int i_node_rank = atoi(node_rank);
if (i_node_rank == rank) {
printf("[%d] round %d, value %s\n", rank, i_round, itr.val());
if (strcmp("allreduce", itr.val())) record(allReduce);
else if (strcmp("broadcast", itr.val())) record(broadcast);
else if (strcmp("loadcheckpoint", itr.val())) record(loadCheckpoint);
else if (strcmp("checkpoint", itr.val())) record(checkpoint);
int i_node_rank = atoi(node_rank);
// if it's something for me
if (i_node_rank == rank) {
int i_round = atoi(round);
// in my current round
if (i_round == current_round) {
printf("[%d] round %d, value %s\n", rank, i_round, itr.val());
if (strcmp("allreduce", itr.val())) record(allReduce);
else if (strcmp("broadcast", itr.val())) record(broadcast);
else if (strcmp("loadcheckpoint", itr.val())) record(loadCheckpoint);
else if (strcmp("checkpoint", itr.val())) record(checkpoint);
}
}
}
@ -82,6 +106,8 @@ private:
std::map<int,bool> broadcast;
std::map<int,bool> loadCheckpoint;
std::map<int,bool> checkpoint;
};
}

View File

@ -1,7 +1,8 @@
#!/bin/bash
if [ "$#" -ne 3 ];
if [ "$#" -ne 4 ];
then
echo "Usage <nslave> <ndata> <config>"
echo "Usage <nslave> <ndata> <config> <round_files_dir>"
exit -1
fi
../submit_job_tcp.py $1 test_allreduce $2 $3
../submit_job_tcp.py $1 test_allreduce $2 $3 $4

View File

@ -72,7 +72,7 @@ int main(int argc, char *argv[]) {
int rank = sync::GetRank();
std::string name = sync::GetProcessorName();
test::Mock mock(rank, argv[2]);
test::Mock mock(rank, argv[2], argv[3]);
printf("[%d] start at %s\n", rank, name.c_str());
TestMax(mock, n);

View File

@ -5,8 +5,5 @@
1_0 = allreduce
1_1 = broadcast
1_2 = loadcheckpoint
1_3 = checkpoint
2_0 = allreduce
2_2 = checkpoint
2_2 = allreduce