Expose RabitAllGatherRing and RabitGetRingPrevRank (#113)
* add unittests * Expose RabitAllGatherRing and RabitGetRingPrevRank * Enabled TCP_NODELAY to decrease latency
This commit is contained in:
66
test/cpp/allreduce_base_test.cpp
Normal file
66
test/cpp/allreduce_base_test.cpp
Normal file
@@ -0,0 +1,66 @@
|
||||
#define RABIT_CXXTESTDEFS_H
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "../../src/allreduce_base.h"
|
||||
|
||||
TEST(allreduce_base, init_task)
|
||||
{
|
||||
rabit::engine::AllreduceBase base;
|
||||
|
||||
std::string rabit_task_id = "rabit_task_id=1";
|
||||
char cmd[rabit_task_id.size()+1];
|
||||
std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
|
||||
cmd[rabit_task_id.size()] = '\0';
|
||||
|
||||
char* argv[] = {cmd};
|
||||
base.Init(1, argv);
|
||||
EXPECT_EQ(base.task_id, "1");
|
||||
}
|
||||
|
||||
TEST(allreduce_base, init_with_cache_on)
|
||||
{
|
||||
rabit::engine::AllreduceBase base;
|
||||
|
||||
std::string rabit_task_id = "rabit_task_id=1";
|
||||
char cmd[rabit_task_id.size()+1];
|
||||
std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
|
||||
cmd[rabit_task_id.size()] = '\0';
|
||||
|
||||
std::string rabit_bootstrap_cache = "rabit_bootstrap_cache=1";
|
||||
char cmd2[rabit_bootstrap_cache.size()+1];
|
||||
std::copy(rabit_bootstrap_cache.begin(), rabit_bootstrap_cache.end(), cmd2);
|
||||
cmd2[rabit_bootstrap_cache.size()] = '\0';
|
||||
|
||||
std::string rabit_debug = "rabit_debug=1";
|
||||
char cmd3[rabit_debug.size()+1];
|
||||
std::copy(rabit_debug.begin(), rabit_debug.end(), cmd3);
|
||||
cmd3[rabit_debug.size()] = '\0';
|
||||
|
||||
char* argv[] = {cmd, cmd2, cmd3};
|
||||
base.Init(3, argv);
|
||||
EXPECT_EQ(base.task_id, "1");
|
||||
EXPECT_EQ(base.rabit_bootstrap_cache, 1);
|
||||
EXPECT_EQ(base.rabit_debug, 1);
|
||||
}
|
||||
|
||||
TEST(allreduce_base, init_with_ring_reduce)
|
||||
{
|
||||
rabit::engine::AllreduceBase base;
|
||||
|
||||
std::string rabit_task_id = "rabit_task_id=1";
|
||||
char cmd[rabit_task_id.size()+1];
|
||||
std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
|
||||
cmd[rabit_task_id.size()] = '\0';
|
||||
|
||||
std::string rabit_reduce_ring_mincount = "rabit_reduce_ring_mincount=1";
|
||||
char cmd2[rabit_reduce_ring_mincount.size()+1];
|
||||
std::copy(rabit_reduce_ring_mincount.begin(), rabit_reduce_ring_mincount.end(), cmd2);
|
||||
cmd2[rabit_reduce_ring_mincount.size()] = '\0';
|
||||
|
||||
char* argv[] = {cmd, cmd2};
|
||||
base.Init(2, argv);
|
||||
EXPECT_EQ(base.task_id, "1");
|
||||
EXPECT_EQ(base.reduce_ring_mincount, 1);
|
||||
}
|
||||
51
test/cpp/allreduce_mock_test.cpp
Normal file
51
test/cpp/allreduce_mock_test.cpp
Normal file
@@ -0,0 +1,51 @@
|
||||
#define RABIT_CXXTESTDEFS_H
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "../../src/allreduce_mock.h"
|
||||
|
||||
TEST(allreduce_mock, mock_allreduce)
|
||||
{
|
||||
rabit::engine::AllreduceMock m;
|
||||
|
||||
std::string mock_str = "mock=0,0,0,0";
|
||||
char cmd[mock_str.size()+1];
|
||||
std::copy(mock_str.begin(), mock_str.end(), cmd);
|
||||
cmd[mock_str.size()] = '\0';
|
||||
|
||||
char* argv[] = {cmd};
|
||||
m.Init(1, argv);
|
||||
m.rank = 0;
|
||||
EXPECT_EXIT(m.Allreduce(nullptr,0,0,nullptr,nullptr,nullptr), ::testing::ExitedWithCode(255), "");
|
||||
}
|
||||
|
||||
TEST(allreduce_mock, mock_broadcast)
|
||||
{
|
||||
rabit::engine::AllreduceMock m;
|
||||
std::string mock_str = "mock=0,1,2,0";
|
||||
char cmd[mock_str.size()+1];
|
||||
std::copy(mock_str.begin(), mock_str.end(), cmd);
|
||||
cmd[mock_str.size()] = '\0';
|
||||
char* argv[] = {cmd};
|
||||
m.Init(1, argv);
|
||||
m.rank = 0;
|
||||
m.version_number=1;
|
||||
m.seq_counter=2;
|
||||
EXPECT_EXIT(m.Broadcast(nullptr,0,0), ::testing::ExitedWithCode(255), "");
|
||||
}
|
||||
|
||||
TEST(allreduce_mock, mock_gather)
|
||||
{
|
||||
rabit::engine::AllreduceMock m;
|
||||
std::string mock_str = "mock=3,13,22,0";
|
||||
char cmd[mock_str.size()+1];
|
||||
std::copy(mock_str.begin(), mock_str.end(), cmd);
|
||||
cmd[mock_str.size()] = '\0';
|
||||
char* argv[] = {cmd};
|
||||
m.Init(1, argv);
|
||||
m.rank = 3;
|
||||
m.version_number=13;
|
||||
m.seq_counter=22;
|
||||
EXPECT_EXIT(m.Allgather(nullptr,0,0,0,0), ::testing::ExitedWithCode(255), "");
|
||||
}
|
||||
@@ -26,7 +26,7 @@ class Model : public rabit::Serializable {
|
||||
}
|
||||
};
|
||||
|
||||
inline void TestMax(Model *model, int ntrial, int iter) {
|
||||
inline void TestMax(Model *model, int iter) {
|
||||
int rank = rabit::GetRank();
|
||||
int nproc = rabit::GetWorldSize();
|
||||
const int z = iter + 111;
|
||||
@@ -47,7 +47,7 @@ inline void TestMax(Model *model, int ntrial, int iter) {
|
||||
model->data = ndata;
|
||||
}
|
||||
|
||||
inline void TestSum(Model *model, int ntrial, int iter) {
|
||||
inline void TestSum(Model *model, int iter) {
|
||||
int rank = rabit::GetRank();
|
||||
int nproc = rabit::GetWorldSize();
|
||||
const int z = 131 + iter;
|
||||
@@ -69,7 +69,30 @@ inline void TestSum(Model *model, int ntrial, int iter) {
|
||||
model->data = ndata;
|
||||
}
|
||||
|
||||
inline void TestBcast(size_t n, int root, int ntrial, int iter) {
|
||||
inline void TestAllgather(Model *model, int iter) {
|
||||
int rank = rabit::GetRank();
|
||||
int nproc = rabit::GetWorldSize();
|
||||
const int z = 131 + iter;
|
||||
|
||||
std::vector<float> ndata(model->data.size() * nproc);
|
||||
size_t beginSlice = rank * model->data.size();
|
||||
for (size_t i = 0; i < model->data.size(); ++i) {
|
||||
ndata[beginSlice + i] = (i * (rank+1)) % z + model->data[i];
|
||||
}
|
||||
Allgather(&ndata[0], ndata.size(), beginSlice,
|
||||
model->data.size(), model->data.size());
|
||||
|
||||
for (size_t i = 0; i < ndata.size(); ++i) {
|
||||
int curRank = i / model->data.size();
|
||||
int remainder = i % model->data.size();
|
||||
float data = (remainder * (curRank+1)) % z + model->data[remainder];
|
||||
utils::Check(fabsf(data - ndata[i]) < 1e-5 ,
|
||||
"[%d] TestAllgather check failure, local=%g, allgatherring=%g", rank, data, ndata[i]);
|
||||
}
|
||||
model->data = ndata;
|
||||
}
|
||||
|
||||
inline void TestBcast(size_t n, int root) {
|
||||
int rank = rabit::GetRank();
|
||||
std::string s; s.resize(n);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
@@ -113,19 +136,22 @@ int main(int argc, char *argv[]) {
|
||||
printf("[%d] reload-trail=%d, init iter=%d\n", rank, ntrial, iter);
|
||||
|
||||
for (int r = iter; r < 3; ++r) {
|
||||
TestMax(&model, ntrial, r);
|
||||
TestMax(&model, r);
|
||||
printf("[%d] !!!TestMax pass, iter=%d\n", rank, r);
|
||||
int step = std::max(nproc / 3, 1);
|
||||
for (int i = 0; i < nproc; i += step) {
|
||||
TestBcast(n, i, ntrial, r);
|
||||
TestBcast(n, i);
|
||||
}
|
||||
printf("[%d] !!!TestBcast pass, iter=%d\n", rank, r);
|
||||
|
||||
TestSum(&model, ntrial, r);
|
||||
TestSum(&model, r);
|
||||
printf("[%d] !!!TestSum pass, iter=%d\n", rank, r);
|
||||
TestAllgather(&model, r);
|
||||
printf("[%d] !!!TestAllgather pass, iter=%d\n", rank, r);
|
||||
rabit::CheckPoint(&model);
|
||||
printf("[%d] !!!Checkpoint pass, iter=%d\n", rank, r);
|
||||
}
|
||||
rabit::Finalize();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user