allow not stop process in error (#97)

* allow not stop process in error

* fix merge error
This commit is contained in:
Nan Zhu
2019-06-25 13:04:39 -07:00
committed by GitHub
parent a429748e24
commit fc85f776f4
4 changed files with 44 additions and 4 deletions

View File

@@ -14,6 +14,11 @@
#include "./allreduce_base.h"
namespace rabit {
namespace utils {
bool STOP_PROCESS_ON_ERROR = true;
}
namespace engine {
// constructor
AllreduceBase::AllreduceBase(void) {
@@ -48,6 +53,7 @@ AllreduceBase::AllreduceBase(void) {
env_vars.push_back("DMLC_TRACKER_URI");
env_vars.push_back("DMLC_TRACKER_PORT");
env_vars.push_back("DMLC_WORKER_CONNECT_RETRY");
env_vars.push_back("DMLC_WORKER_STOP_PROCESS_ON_ERROR");
}
// initialization function
@@ -190,6 +196,15 @@ void AllreduceBase::SetParam(const char *name, const char *val) {
if (!strcmp(name, "DMLC_WORKER_CONNECT_RETRY")) {
connect_retry = atoi(val);
}
if (!strcmp(name, "DMLC_WORKER_STOP_PROCESS_ON_ERROR")) {
if (!strcmp(val, "true")) {
rabit::utils::STOP_PROCESS_ON_ERROR = true;
} else if (!strcmp(val, "false")) {
rabit::utils::STOP_PROCESS_ON_ERROR = false;
} else {
throw std::runtime_error("invalid value of DMLC_WORKER_STOP_PROCESS_ON_ERROR");
}
}
}
/*!
* \brief initialize connection to the tracker

View File

@@ -13,6 +13,11 @@
#include "../include/rabit/internal/engine.h"
namespace rabit {
namespace utils {
bool STOP_PROCESS_ON_ERROR = true;
}
namespace engine {
/*! \brief EmptyEngine */
class EmptyEngine : public IEngine {

View File

@@ -15,6 +15,11 @@
#include "../include/rabit/internal/utils.h"
namespace rabit {
namespace utils {
bool STOP_PROCESS_ON_ERROR = true;
}
namespace engine {
/*! \brief implementation of engine using MPI */
class MPIEngine : public IEngine {