Remove stop process. (#143)

This commit is contained in:
Jiaming Yuan 2020-08-06 01:12:00 +08:00 committed by GitHub
parent e6cd74ead3
commit 4acdd7c6f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 25 additions and 54 deletions

View File

@ -65,10 +65,6 @@ namespace utils {
/*! \brief error message buffer length */ /*! \brief error message buffer length */
const int kPrintBuffer = 1 << 12; const int kPrintBuffer = 1 << 12;
/*! \brief we may want to keep the process alive when there are multiple workers
* co-locate in the same process */
extern bool STOP_PROCESS_ON_ERROR;
/* \brief Case-insensitive string comparison */ /* \brief Case-insensitive string comparison */
inline int CompareStringsCaseInsensitive(const char* s1, const char* s2) { inline int CompareStringsCaseInsensitive(const char* s1, const char* s2) {
#ifdef _MSC_VER #ifdef _MSC_VER
@ -89,26 +85,17 @@ inline bool StringToBool(const char* s) {
* \param msg error message * \param msg error message
*/ */
inline void HandleAssertError(const char *msg) { inline void HandleAssertError(const char *msg) {
if (STOP_PROCESS_ON_ERROR) { fprintf(stderr,
fprintf(stderr, "AssertError:%s, shutting down process\n", msg); "AssertError:%s, rabit is configured to keep process running\n", msg);
exit(-1);
} else {
fprintf(stderr, "AssertError:%s, rabit is configured to keep process running\n", msg);
throw dmlc::Error(msg); throw dmlc::Error(msg);
}
} }
/*! /*!
* \brief handling of Check error, caused by inappropriate input * \brief handling of Check error, caused by inappropriate input
* \param msg error message * \param msg error message
*/ */
inline void HandleCheckError(const char *msg) { inline void HandleCheckError(const char *msg) {
if (STOP_PROCESS_ON_ERROR) {
fprintf(stderr, "%s, shutting down process\n", msg);
exit(-1);
} else {
fprintf(stderr, "%s, rabit is configured to keep process running\n", msg); fprintf(stderr, "%s, rabit is configured to keep process running\n", msg);
throw dmlc::Error(msg); throw dmlc::Error(msg);
}
} }
inline void HandlePrint(const char *msg) { inline void HandlePrint(const char *msg) {
printf("%s", msg); printf("%s", msg);

View File

@ -13,11 +13,6 @@
#include <map> #include <map>
namespace rabit { namespace rabit {
namespace utils {
bool STOP_PROCESS_ON_ERROR = true;
}
namespace engine { namespace engine {
// constructor // constructor
AllreduceBase::AllreduceBase(void) { AllreduceBase::AllreduceBase(void) {
@ -48,7 +43,6 @@ AllreduceBase::AllreduceBase(void) {
env_vars.push_back("DMLC_TRACKER_URI"); env_vars.push_back("DMLC_TRACKER_URI");
env_vars.push_back("DMLC_TRACKER_PORT"); env_vars.push_back("DMLC_TRACKER_PORT");
env_vars.push_back("DMLC_WORKER_CONNECT_RETRY"); env_vars.push_back("DMLC_WORKER_CONNECT_RETRY");
env_vars.push_back("DMLC_WORKER_STOP_PROCESS_ON_ERROR");
} }
// initialization function // initialization function
@ -200,15 +194,6 @@ void AllreduceBase::SetParam(const char *name, const char *val) {
if (!strcmp(name, "DMLC_WORKER_CONNECT_RETRY")) { if (!strcmp(name, "DMLC_WORKER_CONNECT_RETRY")) {
connect_retry = atoi(val); connect_retry = atoi(val);
} }
if (!strcmp(name, "DMLC_WORKER_STOP_PROCESS_ON_ERROR")) {
if (!strcmp(val, "true")) {
rabit::utils::STOP_PROCESS_ON_ERROR = true;
} else if (!strcmp(val, "false")) {
rabit::utils::STOP_PROCESS_ON_ERROR = false;
} else {
throw std::runtime_error("invalid value of DMLC_WORKER_STOP_PROCESS_ON_ERROR");
}
}
if (!strcmp(name, "rabit_bootstrap_cache")) { if (!strcmp(name, "rabit_bootstrap_cache")) {
rabit_bootstrap_cache = utils::StringToBool(val); rabit_bootstrap_cache = utils::StringToBool(val);
} }

View File

@ -550,8 +550,9 @@ void AllreduceRobust::CheckPoint_(const Serializable *global_model,
delta = utils::GetTime() - start; delta = utils::GetTime() - start;
// log checkpoint ack latency // log checkpoint ack latency
if (rabit_debug) { if (rabit_debug) {
utils::HandleLogInfo("[%d] checkpoint ack finished version %d, take %f seconds\n", utils::HandleLogInfo(
rank, version_number, delta); "[%d] checkpoint ack finished version %d, take %f seconds\n", rank,
version_number, delta);
} }
} }
/*! /*!

View File

@ -12,11 +12,6 @@
#include "rabit/internal/engine.h" #include "rabit/internal/engine.h"
namespace rabit { namespace rabit {
namespace utils {
bool STOP_PROCESS_ON_ERROR = true;
}
namespace engine { namespace engine {
/*! \brief EmptyEngine */ /*! \brief EmptyEngine */
class EmptyEngine : public IEngine { class EmptyEngine : public IEngine {

View File

@ -15,11 +15,6 @@
#include "rabit/internal/utils.h" #include "rabit/internal/utils.h"
namespace rabit { namespace rabit {
namespace utils {
bool STOP_PROCESS_ON_ERROR = true;
}
namespace engine { namespace engine {
/*! \brief implementation of engine using MPI */ /*! \brief implementation of engine using MPI */
class MPIEngine : public IEngine { class MPIEngine : public IEngine {

View File

@ -3,6 +3,7 @@ find_package(GTest REQUIRED)
add_executable( add_executable(
unit_tests unit_tests
test_io.cc test_io.cc
test_utils.cc
allreduce_robust_test.cc allreduce_robust_test.cc
allreduce_base_test.cc allreduce_base_test.cc
allreduce_mock_test.cc allreduce_mock_test.cc

View File

@ -17,7 +17,7 @@ TEST(allreduce_mock, mock_allreduce)
char* argv[] = {cmd}; char* argv[] = {cmd};
m.Init(1, argv); m.Init(1, argv);
m.rank = 0; m.rank = 0;
EXPECT_EXIT(m.Allreduce(nullptr,0,0,nullptr,nullptr,nullptr), ::testing::ExitedWithCode(255), ""); EXPECT_THROW(m.Allreduce(nullptr,0,0,nullptr,nullptr,nullptr), dmlc::Error);
} }
TEST(allreduce_mock, mock_broadcast) TEST(allreduce_mock, mock_broadcast)
@ -32,5 +32,5 @@ TEST(allreduce_mock, mock_broadcast)
m.rank = 0; m.rank = 0;
m.version_number=1; m.version_number=1;
m.seq_counter=2; m.seq_counter=2;
EXPECT_EXIT(m.Broadcast(nullptr,0,0), ::testing::ExitedWithCode(255), ""); EXPECT_THROW(m.Broadcast(nullptr,0,0), dmlc::Error);
} }

View File

@ -3,6 +3,7 @@
#include <string> #include <string>
#include <iostream> #include <iostream>
#include <dmlc/logging.h>
#include "../../src/allreduce_mock.h" #include "../../src/allreduce_mock.h"
TEST(allreduce_mock, mock_allreduce) TEST(allreduce_mock, mock_allreduce)
@ -17,7 +18,7 @@ TEST(allreduce_mock, mock_allreduce)
char* argv[] = {cmd}; char* argv[] = {cmd};
m.Init(1, argv); m.Init(1, argv);
m.rank = 0; m.rank = 0;
EXPECT_EXIT(m.Allreduce(nullptr,0,0,nullptr,nullptr,nullptr), ::testing::ExitedWithCode(255), ""); EXPECT_THROW({m.Allreduce(nullptr,0,0,nullptr,nullptr,nullptr);}, dmlc::Error);
} }
TEST(allreduce_mock, mock_broadcast) TEST(allreduce_mock, mock_broadcast)
@ -32,7 +33,7 @@ TEST(allreduce_mock, mock_broadcast)
m.rank = 0; m.rank = 0;
m.version_number=1; m.version_number=1;
m.seq_counter=2; m.seq_counter=2;
EXPECT_EXIT(m.Broadcast(nullptr,0,0), ::testing::ExitedWithCode(255), ""); EXPECT_THROW({m.Broadcast(nullptr,0,0);}, dmlc::Error);
} }
TEST(allreduce_mock, mock_gather) TEST(allreduce_mock, mock_gather)
@ -47,5 +48,5 @@ TEST(allreduce_mock, mock_gather)
m.rank = 3; m.rank = 3;
m.version_number=13; m.version_number=13;
m.seq_counter=22; m.seq_counter=22;
EXPECT_EXIT(m.Allgather(nullptr,0,0,0,0), ::testing::ExitedWithCode(255), ""); EXPECT_THROW({m.Allgather(nullptr,0,0,0,0);}, dmlc::Error);
} }

6
test/cpp/test_utils.cc Normal file
View File

@ -0,0 +1,6 @@
#include <gtest/gtest.h>
#include <rabit/internal/utils.h>
TEST(Utils, Assert) {
EXPECT_THROW({rabit::utils::Assert(false, "foo");}, dmlc::Error);
}