initial version of allreduce

This commit is contained in:
tqchen
2014-11-25 16:15:56 -08:00
parent 5e5bdda491
commit d37f38c455
13 changed files with 1683 additions and 0 deletions

33
test/Makefile Normal file
View File

@@ -0,0 +1,33 @@
export CC = gcc
export CXX = g++
export MPICXX = mpicxx
export LDFLAGS= -pthread -lm
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -I../src
ifeq ($(no_omp),1)
CFLAGS += -DDISABLE_OPENMP
else
CFLAGS += -fopenmp
endif
# specify tensor path
BIN = test_allreduce
OBJ = engine_tcp.o
.PHONY: clean all
all: $(BIN) $(MPIBIN)
engine_tcp.o: ../src/engine_tcp.cpp ../src/*.h
test_allreduce: test_allreduce.cpp ../src/*.h engine_tcp.o
$(BIN) :
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
$(OBJ) :
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
$(MPIBIN) :
$(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
clean:
$(RM) $(BIN) $(MPIBIN) *~ ../src/*~

7
test/test.sh Executable file
View File

@@ -0,0 +1,7 @@
#!/bin/bash
if [ "$#" -ne 2 ];
then
echo "Usage <nslave> <ndata>"
exit -1
fi
../submit_job_tcp.py $1 test_allreduce $2

80
test/test_allreduce.cpp Normal file
View File

@@ -0,0 +1,80 @@
#include <allreduce.h>
#include <utils.h>
#include <cstdio>
#include <cstdlib>
#include <cmath>
using namespace sync;
inline void TestMax(size_t n) {
int rank = sync::GetRank();
int nproc = sync::GetWorldSize();
std::vector<float> ndata(n);
for (size_t i = 0; i < ndata.size(); ++i) {
ndata[i] = (i * (rank+1)) % 111;
}
sync::AllReduce<op::Max>(&ndata[0], ndata.size());
for (size_t i = 0; i < ndata.size(); ++i) {
float rmax = (i * 1) % 111;
for (int r = 0; r < nproc; ++r) {
rmax = std::max(rmax, (float)((i * (r+1)) % 111));
}
utils::Check(rmax == ndata[i], "[%d] TestMax check failure", rank);
}
}
inline void TestSum(size_t n) {
int rank = sync::GetRank();
int nproc = sync::GetWorldSize();
const int z = 131;
std::vector<float> ndata(n);
for (size_t i = 0; i < ndata.size(); ++i) {
ndata[i] = (i * (rank+1)) % z;
}
sync::AllReduce<op::Sum>(&ndata[0], ndata.size());
for (size_t i = 0; i < ndata.size(); ++i) {
float rsum = 0.0f;
for (int r = 0; r < nproc; ++r) {
rsum += (float)((i * (r+1)) % z);
}
utils::Check(fabsf(rsum - ndata[i]) < 1e-5 ,
"[%d] TestSum check failure, local=%g, allreduce=%g", rank, rsum, ndata[i]);
}
}
inline void TestBcast(size_t n, int root) {
int rank = sync::GetRank();
std::string s; s.resize(n);
for (size_t i = 0; i < n; ++i) {
s[i] = char(i % 126 + 1);
}
std::string res;
if (root == rank) {
res = s;
sync::Bcast(&res, root);
} else {
sync::Bcast(&res, root);
}
utils::Check(res == s, "[%d] TestBcast fail", rank);
}
int main(int argc, char *argv[]) {
if (argc < 2) {
printf("Usage: <ndata>\n");
return 0;
}
int n = atoi(argv[1]);
sync::Init(argc, argv);
int rank = sync::GetRank();
std::string name = sync::GetProcessorName();
printf("[%d] start at %s\n", rank, name.c_str());
TestMax(n);
printf("[%d] TestMax pass\n", rank);
TestSum(n);
printf("[%d] TestSum pass\n", rank);
sync::Finalize();
printf("[%d] all check pass\n", rank);
return 0;
}