Merge rabit

This commit is contained in:
fis
2020-08-18 03:52:33 +08:00
81 changed files with 11230 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
find_package(GTest REQUIRED)
add_executable(
unit_tests
test_io.cc
test_utils.cc
allreduce_robust_test.cc
allreduce_base_test.cc
allreduce_mock_test.cc
test_main.cpp)
target_link_libraries(
unit_tests PRIVATE
GTest::GTest GTest::Main
rabit_base rabit_mock rabit)
target_include_directories(unit_tests PUBLIC
"$<BUILD_INTERFACE:${rabit_SOURCE_DIR}/include>"
"$<BUILD_INTERFACE:${DMLC_ROOT}/include>")
set_target_properties(unit_tests
PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED ON
RUNTIME_OUTPUT_DIRECTORY ${rabit_BINARY_DIR}
RUNTIME_OUTPUT_DIRECTORY_DEBUG ${rabit_BINARY_DIR}
RUNTIME_OUTPUT_DIRECTORY_RELEASE ${rabit_BINARY_DIR})
add_test(
NAME TestRabitLib
COMMAND unit_tests
WORKING_DIRECTORY ${rabit_BINARY_DIR})

1
rabit/test/cpp/README.md Normal file
View File

@@ -0,0 +1 @@
Unittests for Rabit

View File

@@ -0,0 +1,66 @@
#define RABIT_CXXTESTDEFS_H
#include <gtest/gtest.h>
#include <string>
#include <iostream>
#include "../../src/allreduce_base.h"
TEST(allreduce_base, init_task)
{
rabit::engine::AllreduceBase base;
std::string rabit_task_id = "rabit_task_id=1";
char cmd[rabit_task_id.size()+1];
std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
cmd[rabit_task_id.size()] = '\0';
char* argv[] = {cmd};
base.Init(1, argv);
EXPECT_EQ(base.task_id, "1");
}
TEST(allreduce_base, init_with_cache_on)
{
rabit::engine::AllreduceBase base;
std::string rabit_task_id = "rabit_task_id=1";
char cmd[rabit_task_id.size()+1];
std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
cmd[rabit_task_id.size()] = '\0';
std::string rabit_bootstrap_cache = "rabit_bootstrap_cache=1";
char cmd2[rabit_bootstrap_cache.size()+1];
std::copy(rabit_bootstrap_cache.begin(), rabit_bootstrap_cache.end(), cmd2);
cmd2[rabit_bootstrap_cache.size()] = '\0';
std::string rabit_debug = "rabit_debug=1";
char cmd3[rabit_debug.size()+1];
std::copy(rabit_debug.begin(), rabit_debug.end(), cmd3);
cmd3[rabit_debug.size()] = '\0';
char* argv[] = {cmd, cmd2, cmd3};
base.Init(3, argv);
EXPECT_EQ(base.task_id, "1");
EXPECT_EQ(base.rabit_bootstrap_cache, 1);
EXPECT_EQ(base.rabit_debug, 1);
}
TEST(allreduce_base, init_with_ring_reduce)
{
rabit::engine::AllreduceBase base;
std::string rabit_task_id = "rabit_task_id=1";
char cmd[rabit_task_id.size()+1];
std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
cmd[rabit_task_id.size()] = '\0';
std::string rabit_reduce_ring_mincount = "rabit_reduce_ring_mincount=1";
char cmd2[rabit_reduce_ring_mincount.size()+1];
std::copy(rabit_reduce_ring_mincount.begin(), rabit_reduce_ring_mincount.end(), cmd2);
cmd2[rabit_reduce_ring_mincount.size()] = '\0';
char* argv[] = {cmd, cmd2};
base.Init(2, argv);
EXPECT_EQ(base.task_id, "1");
EXPECT_EQ(base.reduce_ring_mincount, 1);
}

View File

@@ -0,0 +1,66 @@
#define RABIT_CXXTESTDEFS_H
#include <gtest/gtest.h>
#include <string>
#include <iostream>
#include "../../src/allreduce_base.h"
TEST(allreduce_base, init_task)
{
rabit::engine::AllreduceBase base;
std::string rabit_task_id = "rabit_task_id=1";
char cmd[rabit_task_id.size()+1];
std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
cmd[rabit_task_id.size()] = '\0';
char* argv[] = {cmd};
base.Init(1, argv);
EXPECT_EQ(base.task_id, "1");
}
TEST(allreduce_base, init_with_cache_on)
{
rabit::engine::AllreduceBase base;
std::string rabit_task_id = "rabit_task_id=1";
char cmd[rabit_task_id.size()+1];
std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
cmd[rabit_task_id.size()] = '\0';
std::string rabit_bootstrap_cache = "rabit_bootstrap_cache=1";
char cmd2[rabit_bootstrap_cache.size()+1];
std::copy(rabit_bootstrap_cache.begin(), rabit_bootstrap_cache.end(), cmd2);
cmd2[rabit_bootstrap_cache.size()] = '\0';
std::string rabit_debug = "rabit_debug=1";
char cmd3[rabit_debug.size()+1];
std::copy(rabit_debug.begin(), rabit_debug.end(), cmd3);
cmd3[rabit_debug.size()] = '\0';
char* argv[] = {cmd, cmd2, cmd3};
base.Init(3, argv);
EXPECT_EQ(base.task_id, "1");
EXPECT_EQ(base.rabit_bootstrap_cache, 1);
EXPECT_EQ(base.rabit_debug, 1);
}
TEST(allreduce_base, init_with_ring_reduce)
{
rabit::engine::AllreduceBase base;
std::string rabit_task_id = "rabit_task_id=1";
char cmd[rabit_task_id.size()+1];
std::copy(rabit_task_id.begin(), rabit_task_id.end(), cmd);
cmd[rabit_task_id.size()] = '\0';
std::string rabit_reduce_ring_mincount = "rabit_reduce_ring_mincount=1";
char cmd2[rabit_reduce_ring_mincount.size()+1];
std::copy(rabit_reduce_ring_mincount.begin(), rabit_reduce_ring_mincount.end(), cmd2);
cmd2[rabit_reduce_ring_mincount.size()] = '\0';
char* argv[] = {cmd, cmd2};
base.Init(2, argv);
EXPECT_EQ(base.task_id, "1");
EXPECT_EQ(base.reduce_ring_mincount, 1);
}

View File

@@ -0,0 +1,36 @@
#define RABIT_CXXTESTDEFS_H
#include <gtest/gtest.h>
#include <string>
#include <iostream>
#include "../../src/allreduce_mock.h"
TEST(allreduce_mock, mock_allreduce)
{
rabit::engine::AllreduceMock m;
std::string mock_str = "mock=0,0,0,0";
char cmd[mock_str.size()+1];
std::copy(mock_str.begin(), mock_str.end(), cmd);
cmd[mock_str.size()] = '\0';
char* argv[] = {cmd};
m.Init(1, argv);
m.rank = 0;
EXPECT_THROW(m.Allreduce(nullptr,0,0,nullptr,nullptr,nullptr), dmlc::Error);
}
TEST(allreduce_mock, mock_broadcast)
{
rabit::engine::AllreduceMock m;
std::string mock_str = "mock=0,1,2,0";
char cmd[mock_str.size()+1];
std::copy(mock_str.begin(), mock_str.end(), cmd);
cmd[mock_str.size()] = '\0';
char* argv[] = {cmd};
m.Init(1, argv);
m.rank = 0;
m.version_number=1;
m.seq_counter=2;
EXPECT_THROW(m.Broadcast(nullptr,0,0), dmlc::Error);
}

View File

@@ -0,0 +1,52 @@
#define RABIT_CXXTESTDEFS_H
#include <gtest/gtest.h>
#include <string>
#include <iostream>
#include <dmlc/logging.h>
#include "../../src/allreduce_mock.h"
TEST(allreduce_mock, mock_allreduce)
{
rabit::engine::AllreduceMock m;
std::string mock_str = "mock=0,0,0,0";
char cmd[mock_str.size()+1];
std::copy(mock_str.begin(), mock_str.end(), cmd);
cmd[mock_str.size()] = '\0';
char* argv[] = {cmd};
m.Init(1, argv);
m.rank = 0;
EXPECT_THROW({m.Allreduce(nullptr,0,0,nullptr,nullptr,nullptr);}, dmlc::Error);
}
TEST(allreduce_mock, mock_broadcast)
{
rabit::engine::AllreduceMock m;
std::string mock_str = "mock=0,1,2,0";
char cmd[mock_str.size()+1];
std::copy(mock_str.begin(), mock_str.end(), cmd);
cmd[mock_str.size()] = '\0';
char* argv[] = {cmd};
m.Init(1, argv);
m.rank = 0;
m.version_number=1;
m.seq_counter=2;
EXPECT_THROW({m.Broadcast(nullptr,0,0);}, dmlc::Error);
}
TEST(allreduce_mock, mock_gather)
{
rabit::engine::AllreduceMock m;
std::string mock_str = "mock=3,13,22,0";
char cmd[mock_str.size()+1];
std::copy(mock_str.begin(), mock_str.end(), cmd);
cmd[mock_str.size()] = '\0';
char* argv[] = {cmd};
m.Init(1, argv);
m.rank = 3;
m.version_number=13;
m.seq_counter=22;
EXPECT_THROW({m.Allgather(nullptr,0,0,0,0);}, dmlc::Error);
}

View File

@@ -0,0 +1,233 @@
#define RABIT_CXXTESTDEFS_H
#include <gtest/gtest.h>
#include <chrono>
#include <string>
#include <iostream>
#include "../../src/allreduce_robust.h"
inline void mockerr(const char *fmt, ...) {EXPECT_STRCASEEQ(fmt, "[%d] exit due to time out %d s\n");}
inline void mockassert(bool val, const char *fmt, ...) {}
rabit::engine::AllreduceRobust::ReturnType err_type(rabit::engine::AllreduceRobust::ReturnTypeEnum::kSockError);
rabit::engine::AllreduceRobust::ReturnType succ_type(rabit::engine::AllreduceRobust::ReturnTypeEnum::kSuccess);
TEST(allreduce_robust, sync_error_timeout)
{
rabit::engine::AllreduceRobust m;
std::string rabit_timeout = "rabit_timeout=1";
char cmd[rabit_timeout.size()+1];
std::copy(rabit_timeout.begin(), rabit_timeout.end(), cmd);
cmd[rabit_timeout.size()] = '\0';
std::string rabit_timeout_sec = "rabit_timeout_sec=1";
char cmd1[rabit_timeout_sec.size()+1];
std::copy(rabit_timeout_sec.begin(), rabit_timeout_sec.end(), cmd1);
cmd1[rabit_timeout_sec.size()] = '\0';
char* argv[] = {cmd,cmd1};
m.Init(2, argv);
m.rank = 0;
m.rabit_bootstrap_cache = 1;
m._error = mockerr;
m._assert = mockassert;
EXPECT_EQ(m.CheckAndRecover(err_type), false);
std::this_thread::sleep_for(std::chrono::milliseconds(1500));
EXPECT_EQ(m.rabit_timeout_task.get(), false);
}
TEST(allreduce_robust, sync_error_reset)
{
rabit::engine::AllreduceRobust m;
std::string rabit_timeout = "rabit_timeout=1";
char cmd[rabit_timeout.size()+1];
std::copy(rabit_timeout.begin(), rabit_timeout.end(), cmd);
cmd[rabit_timeout.size()] = '\0';
std::string rabit_timeout_sec = "rabit_timeout_sec=1";
char cmd1[rabit_timeout_sec.size()+1];
std::copy(rabit_timeout_sec.begin(), rabit_timeout_sec.end(), cmd1);
cmd1[rabit_timeout_sec.size()] = '\0';
std::string rabit_debug = "rabit_debug=1";
char cmd2[rabit_debug.size()+1];
std::copy(rabit_debug.begin(), rabit_debug.end(), cmd2);
cmd2[rabit_debug.size()] = '\0';
char* argv[] = {cmd, cmd1,cmd2};
m.Init(3, argv);
m.rank = 0;
m._assert = mockassert;
EXPECT_EQ(m.CheckAndRecover(err_type), false);
std::this_thread::sleep_for(std::chrono::milliseconds(100));
EXPECT_EQ(m.CheckAndRecover(succ_type), true);
EXPECT_EQ(m.rabit_timeout_task.get(), true);
m.Shutdown();
}
TEST(allreduce_robust, sync_success_error_timeout)
{
rabit::engine::AllreduceRobust m;
std::string rabit_timeout = "rabit_timeout=1";
char cmd[rabit_timeout.size()+1];
std::copy(rabit_timeout.begin(), rabit_timeout.end(), cmd);
cmd[rabit_timeout.size()] = '\0';
std::string rabit_timeout_sec = "rabit_timeout_sec=1";
char cmd1[rabit_timeout_sec.size()+1];
std::copy(rabit_timeout_sec.begin(), rabit_timeout_sec.end(), cmd1);
cmd1[rabit_timeout_sec.size()] = '\0';
std::string rabit_debug = "rabit_debug=1";
char cmd2[rabit_debug.size()+1];
std::copy(rabit_debug.begin(), rabit_debug.end(), cmd2);
cmd2[rabit_debug.size()] = '\0';
char* argv[] = {cmd, cmd1,cmd2};
m.Init(3, argv);
m.rank = 0;
m.rabit_bootstrap_cache = 1;
m._assert = mockassert;
m._error = mockerr;
EXPECT_EQ(m.CheckAndRecover(succ_type), true);
std::this_thread::sleep_for(std::chrono::milliseconds(100));
EXPECT_EQ(m.CheckAndRecover(err_type), false);
std::this_thread::sleep_for(std::chrono::milliseconds(1500));
EXPECT_EQ(m.rabit_timeout_task.get(), false);
}
TEST(allreduce_robust, sync_success_error_success)
{
rabit::engine::AllreduceRobust m;
std::string rabit_timeout = "rabit_timeout=1";
char cmd[rabit_timeout.size()+1];
std::copy(rabit_timeout.begin(), rabit_timeout.end(), cmd);
cmd[rabit_timeout.size()] = '\0';
std::string rabit_timeout_sec = "rabit_timeout_sec=1";
char cmd1[rabit_timeout_sec.size()+1];
std::copy(rabit_timeout_sec.begin(), rabit_timeout_sec.end(), cmd1);
cmd1[rabit_timeout_sec.size()] = '\0';
std::string rabit_debug = "rabit_debug=1";
char cmd2[rabit_debug.size()+1];
std::copy(rabit_debug.begin(), rabit_debug.end(), cmd2);
cmd2[rabit_debug.size()] = '\0';
char* argv[] = {cmd, cmd1,cmd2};
m.Init(3, argv);
m.rank = 0;
m.rabit_bootstrap_cache = 1;
m._assert = mockassert;
EXPECT_EQ(m.CheckAndRecover(succ_type), true);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
EXPECT_EQ(m.CheckAndRecover(err_type), false);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
EXPECT_EQ(m.CheckAndRecover(succ_type), true);
std::this_thread::sleep_for(std::chrono::milliseconds(1100));
EXPECT_EQ(m.rabit_timeout_task.get(), true);
m.Shutdown();
}
TEST(allreduce_robust, sync_error_no_reset_timeout)
{
rabit::engine::AllreduceRobust m;
std::string rabit_timeout = "rabit_timeout=1";
char cmd[rabit_timeout.size()+1];
std::copy(rabit_timeout.begin(), rabit_timeout.end(), cmd);
cmd[rabit_timeout.size()] = '\0';
std::string rabit_timeout_sec = "rabit_timeout_sec=1";
char cmd1[rabit_timeout_sec.size()+1];
std::copy(rabit_timeout_sec.begin(), rabit_timeout_sec.end(), cmd1);
cmd1[rabit_timeout_sec.size()] = '\0';
std::string rabit_debug = "rabit_debug=1";
char cmd2[rabit_debug.size()+1];
std::copy(rabit_debug.begin(), rabit_debug.end(), cmd2);
cmd2[rabit_debug.size()] = '\0';
char* argv[] = {cmd, cmd1,cmd2};
m.Init(3, argv);
m.rank = 0;
m.rabit_bootstrap_cache = 1;
m._assert = mockassert;
m._error = mockerr;
auto start = std::chrono::system_clock::now();
EXPECT_EQ(m.CheckAndRecover(err_type), false);
std::this_thread::sleep_for(std::chrono::milliseconds(1100));
EXPECT_EQ(m.CheckAndRecover(err_type), false);
m.rabit_timeout_task.wait();
auto end = std::chrono::system_clock::now();
std::chrono::duration<double> diff = end-start;
EXPECT_EQ(m.rabit_timeout_task.get(), false);
// expect second error don't overwrite/reset timeout task
EXPECT_LT(diff.count(), 2);
}
TEST(allreduce_robust, no_timeout_shut_down)
{
rabit::engine::AllreduceRobust m;
std::string rabit_timeout = "rabit_timeout=1";
char cmd[rabit_timeout.size()+1];
std::copy(rabit_timeout.begin(), rabit_timeout.end(), cmd);
cmd[rabit_timeout.size()] = '\0';
std::string rabit_timeout_sec = "rabit_timeout_sec=1";
char cmd1[rabit_timeout_sec.size()+1];
std::copy(rabit_timeout_sec.begin(), rabit_timeout_sec.end(), cmd1);
cmd1[rabit_timeout_sec.size()] = '\0';
std::string rabit_debug = "rabit_debug=1";
char cmd2[rabit_debug.size()+1];
std::copy(rabit_debug.begin(), rabit_debug.end(), cmd2);
cmd2[rabit_debug.size()] = '\0';
char* argv[] = {cmd, cmd1,cmd2};
m.Init(3, argv);
m.rank = 0;
EXPECT_EQ(m.CheckAndRecover(succ_type), true);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
m.Shutdown();
}
TEST(allreduce_robust, shut_down_before_timeout)
{
rabit::engine::AllreduceRobust m;
std::string rabit_timeout = "rabit_timeout=1";
char cmd[rabit_timeout.size()+1];
std::copy(rabit_timeout.begin(), rabit_timeout.end(), cmd);
cmd[rabit_timeout.size()] = '\0';
std::string rabit_timeout_sec = "rabit_timeout_sec=1";
char cmd1[rabit_timeout_sec.size()+1];
std::copy(rabit_timeout_sec.begin(), rabit_timeout_sec.end(), cmd1);
cmd1[rabit_timeout_sec.size()] = '\0';
std::string rabit_debug = "rabit_debug=1";
char cmd2[rabit_debug.size()+1];
std::copy(rabit_debug.begin(), rabit_debug.end(), cmd2);
cmd2[rabit_debug.size()] = '\0';
char* argv[] = {cmd, cmd1,cmd2};
m.Init(3, argv);
m.rank = 0;
rabit::engine::AllreduceRobust::LinkRecord a;
m.err_link = &a;
EXPECT_EQ(m.CheckAndRecover(err_type), false);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
m.Shutdown();
}

18
rabit/test/cpp/test_io.cc Normal file
View File

@@ -0,0 +1,18 @@
/*!
* Copyright (c) 2019 by Contributors
*/
#include <gtest/gtest.h>
#include <rabit/internal/io.h>
#include <vector>
namespace rabit {
TEST(MemoryFixSizeBuffer, Seek) {
size_t constexpr kSize { 64 };
std::vector<int32_t> memory( kSize );
utils::MemoryFixSizeBuffer buf(memory.data(), memory.size());
buf.Seek(utils::MemoryFixSizeBuffer::SeekEnd);
size_t end = buf.Tell();
ASSERT_EQ(end, kSize);
}
} // namespace rabit

View File

@@ -0,0 +1,8 @@
#include "gtest/gtest.h"
int main(int argc, char** argv)
{
::testing::InitGoogleTest(&argc, argv);
::testing::FLAGS_gtest_death_test_style = "threadsafe";
return RUN_ALL_TESTS();
}

View File

@@ -0,0 +1,6 @@
#include <gtest/gtest.h>
#include <rabit/internal/utils.h>
TEST(Utils, Assert) {
EXPECT_THROW({rabit::utils::Assert(false, "foo");}, dmlc::Error);
}