remove is_bootstrap parameter (#102)
* apply openmp simd * clean __buildin detection, moving windows build check from xgboost project, add openmp support for vectorize reduce * apply openmp only to rabit * orgnize rabit signature * remove is_bootstrap, use load_checkpoint as implict flag * visual studio don't support latest openmp * orgnize omp declarations * replace memory copy with vector cast * Revert "replace memory copy with vector cast" This reverts commit 28de4792dcdff40d83d458510d23b7ef0b191d79. * Revert "orgnize omp declarations" This reverts commit 31341233d31ce93ccf34d700262b1f3f6690bbfe. * remove openmp settings, merge into a upcoming pr * mis * per feedback, update comments
This commit is contained in:
@@ -83,7 +83,6 @@ class AllreduceBase : public IEngine {
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
* \param is_bootstrap if this allreduce is needed to bootstrap filed node
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
* \param _line caller line number used to generate unique cache key
|
||||
* \param _caller caller function name used to generate unique cache key
|
||||
@@ -94,7 +93,6 @@ class AllreduceBase : public IEngine {
|
||||
ReduceFunction reducer,
|
||||
PreprocFunction prepare_fun = NULL,
|
||||
void *prepare_arg = NULL,
|
||||
bool is_bootstrap = false,
|
||||
const char* _file = _FILE,
|
||||
const int _line = _LINE,
|
||||
const char* _caller = _CALLER) {
|
||||
@@ -109,14 +107,12 @@ class AllreduceBase : public IEngine {
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param size the size of the data to be broadcasted
|
||||
* \param root the root worker id to broadcast the data
|
||||
* \param is_bootstrap if this broadcast is needed to bootstrap filed node
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
* \param _line caller line number used to generate unique cache key
|
||||
* \param _caller caller function name used to generate unique cache key
|
||||
*/
|
||||
virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root,
|
||||
bool is_bootstrap = false, const char* _file = _FILE,
|
||||
const int _line = _LINE, const char* _caller = _CALLER) {
|
||||
const char* _file = _FILE, const int _line = _LINE, const char* _caller = _CALLER) {
|
||||
if (world_size == 1 || world_size == -1) return;
|
||||
utils::Assert(TryBroadcast(sendrecvbuf_, total_size, root) == kSuccess,
|
||||
"Broadcast failed");
|
||||
|
||||
@@ -30,6 +30,7 @@ AllreduceRobust::AllreduceRobust(void) {
|
||||
global_lazycheck = NULL;
|
||||
use_local_model = -1;
|
||||
recover_counter = 0;
|
||||
checkpoint_loaded = false;
|
||||
env_vars.push_back("rabit_global_replica");
|
||||
env_vars.push_back("rabit_local_replica");
|
||||
}
|
||||
@@ -38,6 +39,7 @@ bool AllreduceRobust::Init(int argc, char* argv[]) {
|
||||
// chenqin: alert user opted in experimental feature.
|
||||
if (rabit_bootstrap_cache) utils::HandleLogInfo(
|
||||
"[EXPERIMENTAL] rabit bootstrap cache has been enabled\n");
|
||||
checkpoint_loaded = false;
|
||||
if (num_global_replica == 0) {
|
||||
result_buffer_round = -1;
|
||||
} else {
|
||||
@@ -157,7 +159,6 @@ int AllreduceRobust::GetBootstrapCache(const std::string &key, void* buf,
|
||||
* will be called by the function before performing Allreduce, to intialize the data in sendrecvbuf_.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
* \param is_bootstrap if this allreduce is needed to bootstrap filed node
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
* \param _line caller line number used to generate unique cache key
|
||||
* \param _caller caller function name used to generate unique cache key
|
||||
@@ -168,7 +169,6 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_,
|
||||
ReduceFunction reducer,
|
||||
PreprocFunction prepare_fun,
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
@@ -183,7 +183,7 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_,
|
||||
+ std::string(_caller) + "#" +std::to_string(type_nbytes) + "x" + std::to_string(count);
|
||||
|
||||
// try fetch bootstrap allreduce results from cache
|
||||
if (is_bootstrap && rabit_bootstrap_cache &&
|
||||
if (!checkpoint_loaded && rabit_bootstrap_cache &&
|
||||
GetBootstrapCache(key, sendrecvbuf_, type_nbytes, count, true) != -1) return;
|
||||
|
||||
double start = utils::GetTime();
|
||||
@@ -217,7 +217,7 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_,
|
||||
}
|
||||
|
||||
// if bootstrap allreduce, store and fetch through cache
|
||||
if (!is_bootstrap || !rabit_bootstrap_cache) {
|
||||
if (checkpoint_loaded || !rabit_bootstrap_cache) {
|
||||
resbuf.PushTemp(seq_counter, type_nbytes, count);
|
||||
seq_counter += 1;
|
||||
} else {
|
||||
@@ -229,13 +229,11 @@ void AllreduceRobust::Allreduce(void *sendrecvbuf_,
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param size the size of the data to be broadcasted
|
||||
* \param root the root worker id to broadcast the data
|
||||
* \param is_bootstrap if this allreduce is needed to bootstrap filed node
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
* \param _line caller line number used to generate unique cache key
|
||||
* \param _caller caller function name used to generate unique cache key
|
||||
*/
|
||||
void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
@@ -245,7 +243,7 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root,
|
||||
std::string key = std::string(_file) + "::" + std::to_string(_line) + "::"
|
||||
+ std::string(_caller) + "#" +std::to_string(total_size) + "@" + std::to_string(root);
|
||||
// try fetch bootstrap allreduce results from cache
|
||||
if (is_bootstrap && rabit_bootstrap_cache &&
|
||||
if (!checkpoint_loaded && rabit_bootstrap_cache &&
|
||||
GetBootstrapCache(key, sendrecvbuf_, total_size, 1, true) != -1) return;
|
||||
|
||||
double start = utils::GetTime();
|
||||
@@ -277,7 +275,7 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root,
|
||||
rank, key.c_str(), root, version_number, seq_counter, delta);
|
||||
}
|
||||
// if bootstrap broadcast, store and fetch through cache
|
||||
if (!is_bootstrap || !rabit_bootstrap_cache) {
|
||||
if (checkpoint_loaded || !rabit_bootstrap_cache) {
|
||||
resbuf.PushTemp(seq_counter, 1, total_size);
|
||||
seq_counter += 1;
|
||||
} else {
|
||||
@@ -308,6 +306,7 @@ void AllreduceRobust::Broadcast(void *sendrecvbuf_, size_t total_size, int root,
|
||||
*/
|
||||
int AllreduceRobust::LoadCheckPoint(Serializable *global_model,
|
||||
Serializable *local_model) {
|
||||
checkpoint_loaded = true;
|
||||
// skip action in single node
|
||||
if (world_size == 1) return 0;
|
||||
this->LocalModelCheck(local_model != NULL);
|
||||
|
||||
@@ -62,7 +62,6 @@ class AllreduceRobust : public AllreduceBase {
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
* \param prepare_arg argument used to passed into the lazy preprocessing function
|
||||
* \param is_bootstrap if this allreduce is needed to bootstrap filed node
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
* \param _line caller line number used to generate unique cache key
|
||||
* \param _caller caller function name used to generate unique cache key
|
||||
@@ -73,7 +72,6 @@ class AllreduceRobust : public AllreduceBase {
|
||||
ReduceFunction reducer,
|
||||
PreprocFunction prepare_fun = NULL,
|
||||
void *prepare_arg = NULL,
|
||||
bool is_bootstrap = false,
|
||||
const char* _file = _FILE,
|
||||
const int _line = _LINE,
|
||||
const char* _caller = _CALLER);
|
||||
@@ -82,13 +80,11 @@ class AllreduceRobust : public AllreduceBase {
|
||||
* \param sendrecvbuf_ buffer for both sending and recving data
|
||||
* \param size the size of the data to be broadcasted
|
||||
* \param root the root worker id to broadcast the data
|
||||
* \param is_bootstrap if this broadcast is needed to bootstrap filed node
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
* \param _line caller line number used to generate unique cache key
|
||||
* \param _caller caller function name used to generate unique cache key
|
||||
*/
|
||||
virtual void Broadcast(void *sendrecvbuf_, size_t total_size, int root,
|
||||
bool is_bootstrap = false,
|
||||
const char* _file = _FILE,
|
||||
const int _line = _LINE,
|
||||
const char* _caller = _CALLER);
|
||||
@@ -643,6 +639,8 @@ o * the input state must exactly one saved state(local state of current node)
|
||||
std::string local_chkpt[2];
|
||||
// version of local checkpoint can be 1 or 0
|
||||
int local_chkpt_version;
|
||||
// if checkpoint were loaded, used to distinguish results boostrap cache from seqno cache
|
||||
bool checkpoint_loaded;
|
||||
};
|
||||
} // namespace engine
|
||||
} // namespace rabit
|
||||
|
||||
@@ -93,12 +93,11 @@ void Allreduce_(void *sendrecvbuf,
|
||||
mpi::OpType op,
|
||||
IEngine::PreprocFunction prepare_fun,
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count, red, prepare_fun,
|
||||
prepare_arg, is_bootstrap, _file, _line, _caller);
|
||||
prepare_arg, _file, _line, _caller);
|
||||
}
|
||||
|
||||
// code for reduce handle
|
||||
@@ -121,14 +120,13 @@ void ReduceHandle::Allreduce(void *sendrecvbuf,
|
||||
size_t type_nbytes, size_t count,
|
||||
IEngine::PreprocFunction prepare_fun,
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
utils::Assert(redfunc_ != NULL, "must intialize handle to call AllReduce");
|
||||
GetEngine()->Allreduce(sendrecvbuf, type_nbytes, count,
|
||||
redfunc_, prepare_fun, prepare_arg,
|
||||
is_bootstrap, _file, _line, _caller);
|
||||
_file, _line, _caller);
|
||||
}
|
||||
} // namespace engine
|
||||
} // namespace rabit
|
||||
|
||||
@@ -31,7 +31,6 @@ class EmptyEngine : public IEngine {
|
||||
ReduceFunction reducer,
|
||||
PreprocFunction prepare_fun,
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
@@ -39,8 +38,7 @@ class EmptyEngine : public IEngine {
|
||||
"use Allreduce_ instead");
|
||||
}
|
||||
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root,
|
||||
bool is_bootstrap, const char* _file,
|
||||
const int _line, const char* _caller) {
|
||||
const char* _file, const int _line, const char* _caller) {
|
||||
}
|
||||
virtual void InitAfterException(void) {
|
||||
utils::Error("EmptyEngine is not fault tolerant");
|
||||
@@ -109,7 +107,6 @@ void Allreduce_(void *sendrecvbuf,
|
||||
mpi::OpType op,
|
||||
IEngine::PreprocFunction prepare_fun,
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
@@ -129,7 +126,6 @@ void ReduceHandle::Allreduce(void *sendrecvbuf,
|
||||
size_t type_nbytes, size_t count,
|
||||
IEngine::PreprocFunction prepare_fun,
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
|
||||
@@ -33,7 +33,6 @@ class MPIEngine : public IEngine {
|
||||
ReduceFunction reducer,
|
||||
PreprocFunction prepare_fun,
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
@@ -41,7 +40,7 @@ class MPIEngine : public IEngine {
|
||||
"use Allreduce_ instead");
|
||||
}
|
||||
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root,
|
||||
bool is_bootstrap, const char* _file, const int _line,
|
||||
const char* _file, const int _line,
|
||||
const char* _caller) {
|
||||
MPI::COMM_WORLD.Bcast(sendrecvbuf_, size, MPI::CHAR, root);
|
||||
}
|
||||
@@ -160,7 +159,6 @@ void Allreduce_(void *sendrecvbuf,
|
||||
mpi::OpType op,
|
||||
IEngine::PreprocFunction prepare_fun,
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
@@ -212,7 +210,6 @@ void ReduceHandle::Allreduce(void *sendrecvbuf,
|
||||
size_t type_nbytes, size_t count,
|
||||
IEngine::PreprocFunction prepare_fun,
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
|
||||
Reference in New Issue
Block a user