remove is_bootstrap parameter (#102)

* apply openmp simd

* clean __buildin detection, moving windows build check from xgboost project, add openmp support for vectorize reduce

* apply openmp only to rabit

* orgnize rabit signature

* remove is_bootstrap, use load_checkpoint as implict flag

* visual studio don't support latest openmp

* orgnize omp declarations

* replace memory copy with vector cast

* Revert "replace memory copy with vector cast"

This reverts commit 28de4792dcdff40d83d458510d23b7ef0b191d79.

* Revert "orgnize omp declarations"

This reverts commit 31341233d31ce93ccf34d700262b1f3f6690bbfe.

* remove openmp settings, merge into a upcoming pr

* mis

* per feedback, update comments
This commit is contained in:
Chen Qin
2019-09-10 11:45:50 -07:00
committed by Nan Zhu
parent 5797dcb64e
commit 9a7ac85d7e
13 changed files with 56 additions and 137 deletions

View File

@@ -83,7 +83,6 @@ class AllreduceBase : public IEngine {
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
* \param prepare_arg argument used to passed into the lazy preprocessing function
* \param is_bootstrap if this allreduce is needed to bootstrap filed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
@@ -94,7 +93,6 @@ class AllreduceBase : public IEngine {
ReduceFunction reducer,
PreprocFunction prepare_fun = NULL,
void *prepare_arg = NULL,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER) {
@@ -109,14 +107,12 @@ class AllreduceBase : public IEngine {
* \param sendrecvbuf_ buffer for both sending and recving data
* \param size the size of the data to be broadcasted
* \param root the root worker id to broadcast the data
* \param is_bootstrap if this broadcast is needed to bootstrap filed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
*/
virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root,
bool is_bootstrap = false, const char* _file = _FILE,
const int _line = _LINE, const char* _caller = _CALLER) {
const char* _file = _FILE, const int _line = _LINE, const char* _caller = _CALLER) {
if (world_size == 1 || world_size == -1) return;
utils::Assert(TryBroadcast(sendrecvbuf_, total_size, root) == kSuccess,
"Broadcast failed");

View File

@@ -30,6 +30,7 @@ AllreduceRobust::AllreduceRobust(void) {
global_lazycheck = NULL;
use_local_model = -1;
recover_counter = 0;
checkpoint_loaded = false;
env_vars.push_back("rabit_global_replica");
env_vars.push_back("rabit_local_replica");
}
@@ -38,6 +39,7 @@ bool AllreduceRobust::Init(int argc, char* argv[]) {
// chenqin: alert user opted in experimental feature.
if (rabit_bootstrap_cache) utils::HandleLogInfo(
"[EXPERIMENTAL] rabit bootstrap cache has been enabled\n");
checkpoint_loaded = false;
if (num_global_replica == 0) {
result_buffer_round = -1;
} else {
@@ -157,7 +159,6 @@ int AllreduceRobust::GetBootstrapCache(const std::string &key, void* buf,
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
* \param prepare_arg argument used to passed into the lazy preprocessing function
* \param is_bootstrap if this allreduce is needed to bootstrap filed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
@@ -168,7 +169,6 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_,
ReduceFunction reducer,
PreprocFunction prepare_fun,
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
@@ -183,7 +183,7 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_,
+ std::string(_caller) + "#" +std::to_string(type_nbytes) + "x" + std::to_string(count);
// try fetch bootstrap allreduce results from cache
if (is_bootstrap && rabit_bootstrap_cache &&
if (!checkpoint_loaded && rabit_bootstrap_cache &&
GetBootstrapCache(key, sendrecvbuf_, type_nbytes, count, true) != -1) return;
double start = utils::GetTime();
@@ -217,7 +217,7 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_,
}
// if bootstrap allreduce, store and fetch through cache
if (!is_bootstrap || !rabit_bootstrap_cache) {
if (checkpoint_loaded || !rabit_bootstrap_cache) {
resbuf.PushTemp(seq_counter, type_nbytes, count);
seq_counter += 1;
} else {
@@ -229,13 +229,11 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_,
* \param sendrecvbuf_ buffer for both sending and recving data
* \param size the size of the data to be broadcasted
* \param root the root worker id to broadcast the data
* \param is_bootstrap if this allreduce is needed to bootstrap filed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
*/
void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
@@ -245,7 +243,7 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root,
std::string key = std::string(_file) + "::" + std::to_string(_line) + "::"
+ std::string(_caller) + "#" +std::to_string(total_size) + "@" + std::to_string(root);
// try fetch bootstrap allreduce results from cache
if (is_bootstrap && rabit_bootstrap_cache &&
if (!checkpoint_loaded && rabit_bootstrap_cache &&
GetBootstrapCache(key, sendrecvbuf_, total_size, 1, true) != -1) return;
double start = utils::GetTime();
@@ -277,7 +275,7 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root,
rank, key.c_str(), root, version_number, seq_counter, delta);
}
// if bootstrap broadcast, store and fetch through cache
if (!is_bootstrap || !rabit_bootstrap_cache) {
if (checkpoint_loaded || !rabit_bootstrap_cache) {
resbuf.PushTemp(seq_counter, 1, total_size);
seq_counter += 1;
} else {
@@ -308,6 +306,7 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root,
*/
int AllreduceRobust::LoadCheckPoint(Serializable *global_model,
Serializable *local_model) {
checkpoint_loaded = true;
// skip action in single node
if (world_size == 1) return 0;
this->LocalModelCheck(local_model != NULL);

View File

@@ -62,7 +62,6 @@ class AllreduceRobust : public AllreduceBase {
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
* \param prepare_arg argument used to passed into the lazy preprocessing function
* \param prepare_arg argument used to passed into the lazy preprocessing function
* \param is_bootstrap if this allreduce is needed to bootstrap filed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
@@ -73,7 +72,6 @@ class AllreduceRobust : public AllreduceBase {
ReduceFunction reducer,
PreprocFunction prepare_fun = NULL,
void *prepare_arg = NULL,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER);
@@ -82,13 +80,11 @@ class AllreduceRobust : public AllreduceBase {
* \param sendrecvbuf_ buffer for both sending and recving data
* \param size the size of the data to be broadcasted
* \param root the root worker id to broadcast the data
* \param is_bootstrap if this broadcast is needed to bootstrap filed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
*/
virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER);
@@ -643,6 +639,8 @@ o * the input state must exactly one saved state(local state of current node)
std::string local_chkpt[2];
// version of local checkpoint can be 1 or 0
int local_chkpt_version;
// if checkpoint were loaded, used to distinguish results boostrap cache from seqno cache
bool checkpoint_loaded;
};
} // namespace engine
} // namespace rabit

View File

@@ -93,12 +93,11 @@ void Allreduce_(void *sendrecvbuf,
mpi::OpType op,
IEngine::PreprocFunction prepare_fun,
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun,
prepare_arg, is_bootstrap, _file, _line, _caller);
prepare_arg, _file, _line, _caller);
}
// code for reduce handle
@@ -121,14 +120,13 @@ void ReduceHandle::Allreduce(void *sendrecvbuf,
size_t type_nbytes, size_t count,
IEngine::PreprocFunction prepare_fun,
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
utils::Assert(redfunc_ != NULL, "must intialize handle to call AllReduce");
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count,
redfunc_, prepare_fun, prepare_arg,
is_bootstrap, _file, _line, _caller);
_file, _line, _caller);
}
} // namespace engine
} // namespace rabit

View File

@@ -31,7 +31,6 @@ class EmptyEngine : public IEngine {
ReduceFunction reducer,
PreprocFunction prepare_fun,
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
@@ -39,8 +38,7 @@ class EmptyEngine : public IEngine {
"use Allreduce_ instead");
}
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root,
bool is_bootstrap, const char* _file,
const int _line, const char* _caller) {
const char* _file, const int _line, const char* _caller) {
}
virtual void InitAfterException(void) {
utils::Error("EmptyEngine is not fault tolerant");
@@ -109,7 +107,6 @@ void Allreduce_(void *sendrecvbuf,
mpi::OpType op,
IEngine::PreprocFunction prepare_fun,
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
@@ -129,7 +126,6 @@ void ReduceHandle::Allreduce(void *sendrecvbuf,
size_t type_nbytes, size_t count,
IEngine::PreprocFunction prepare_fun,
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {

View File

@@ -33,7 +33,6 @@ class MPIEngine : public IEngine {
ReduceFunction reducer,
PreprocFunction prepare_fun,
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
@@ -41,7 +40,7 @@ class MPIEngine : public IEngine {
"use Allreduce_ instead");
}
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root,
bool is_bootstrap, const char* _file, const int _line,
const char* _file, const int _line,
const char* _caller) {
MPI::COMM_WORLD.Bcast(sendrecvbuf_, size, MPI::CHAR, root);
}
@@ -160,7 +159,6 @@ void Allreduce_(void *sendrecvbuf,
mpi::OpType op,
IEngine::PreprocFunction prepare_fun,
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
@@ -212,7 +210,6 @@ void ReduceHandle::Allreduce(void *sendrecvbuf,
size_t type_nbytes, size_t count,
IEngine::PreprocFunction prepare_fun,
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {