remove is_bootstrap parameter (#102)
* apply openmp simd * clean __buildin detection, moving windows build check from xgboost project, add openmp support for vectorize reduce * apply openmp only to rabit * orgnize rabit signature * remove is_bootstrap, use load_checkpoint as implict flag * visual studio don't support latest openmp * orgnize omp declarations * replace memory copy with vector cast * Revert "replace memory copy with vector cast" This reverts commit 28de4792dcdff40d83d458510d23b7ef0b191d79. * Revert "orgnize omp declarations" This reverts commit 31341233d31ce93ccf34d700262b1f3f6690bbfe. * remove openmp settings, merge into a upcoming pr * mis * per feedback, update comments
This commit is contained in:
@@ -9,39 +9,15 @@
|
||||
#include <string>
|
||||
#include "../serializable.h"
|
||||
|
||||
// keeps rabit api caller signature
|
||||
#ifndef RABIT_API_CALLER_SIGNATURE
|
||||
#define RABIT_API_CALLER_SIGNATURE
|
||||
|
||||
#ifdef __has_builtin
|
||||
|
||||
#if __has_builtin(__builtin_FILE)
|
||||
#if (defined(__GNUC__) && !defined(__clang__))
|
||||
#define _FILE __builtin_FILE()
|
||||
#else
|
||||
#define _FILE "N/A"
|
||||
#endif // __has_builtin(__builtin_FILE)
|
||||
|
||||
#if __has_builtin(__builtin_LINE)
|
||||
#define _LINE __builtin_LINE()
|
||||
#else
|
||||
#define _LINE -1
|
||||
#endif // __has_builtin(__builtin_LINE)
|
||||
|
||||
#if __has_builtin(__builtin_FUNCTION)
|
||||
#define _CALLER __builtin_FUNCTION()
|
||||
#else
|
||||
#define _CALLER "N/A"
|
||||
#endif // __has_builtin(__builtin_FUNCTION)
|
||||
|
||||
#else
|
||||
|
||||
#define _FILE "N/A"
|
||||
#define _LINE -1
|
||||
#define _CALLER "N/A"
|
||||
|
||||
#endif // __has_builtin
|
||||
|
||||
#endif // RABIT_API_CALLER_SIGNATURE
|
||||
#endif // (defined(__GNUC__) && !defined(__clang__))
|
||||
|
||||
namespace MPI {
|
||||
/*! \brief MPI data type just to be compatible with MPI reduce function*/
|
||||
@@ -88,7 +64,6 @@ class IEngine {
|
||||
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
||||
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
* \param _line caller line number used to generate unique cache key
|
||||
* \param _caller caller function name used to generate unique cache key
|
||||
@@ -99,7 +74,6 @@ class IEngine {
|
||||
ReduceFunction reducer,
|
||||
PreprocFunction prepare_fun = NULL,
|
||||
void *prepare_arg = NULL,
|
||||
bool is_bootstrap = false,
|
||||
const char* _file = _FILE,
|
||||
const int _line = _LINE,
|
||||
const char* _caller = _CALLER) = 0;
|
||||
@@ -108,13 +82,11 @@ class IEngine {
|
||||
* \param sendrecvbuf_ buffer for both sending and receiving data
|
||||
* \param size the size of the data to be broadcasted
|
||||
* \param root the root worker id to broadcast the data
|
||||
* \param is_bootstrap if this broadcast is needed to bootstrap failed node
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
* \param _line caller line number used to generate unique cache key
|
||||
* \param _caller caller function name used to generate unique cache key
|
||||
*/
|
||||
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root,
|
||||
bool is_bootstrap = false,
|
||||
const char* _file = _FILE,
|
||||
const int _line = _LINE,
|
||||
const char* _caller = _CALLER) = 0;
|
||||
@@ -254,7 +226,6 @@ enum DataType {
|
||||
* will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf_.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to pass into the lazy preprocessing function.
|
||||
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
* \param _line caller line number used to generate unique cache key
|
||||
* \param _caller caller function name used to generate unique cache key
|
||||
@@ -267,7 +238,6 @@ void Allreduce_(void *sendrecvbuf,
|
||||
mpi::OpType op,
|
||||
IEngine::PreprocFunction prepare_fun = NULL,
|
||||
void *prepare_arg = NULL,
|
||||
bool is_bootstrap = false,
|
||||
const char* _file = _FILE,
|
||||
const int _line = _LINE,
|
||||
const char* _caller = _CALLER);
|
||||
@@ -296,7 +266,6 @@ class ReduceHandle {
|
||||
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf_.
|
||||
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
|
||||
* \param prepare_arg argument used to pass into the lazy preprocessing function
|
||||
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
|
||||
* \param _file caller file name used to generate unique cache key
|
||||
* \param _line caller line number used to generate unique cache key
|
||||
* \param _caller caller function name used to generate unique cache key
|
||||
@@ -306,7 +275,6 @@ class ReduceHandle {
|
||||
size_t count,
|
||||
IEngine::PreprocFunction prepare_fun = NULL,
|
||||
void *prepare_arg = NULL,
|
||||
bool is_bootstrap = false,
|
||||
const char* _file = _FILE,
|
||||
const int _line = _LINE,
|
||||
const char* _caller = _CALLER);
|
||||
|
||||
@@ -94,10 +94,9 @@ struct BitOR {
|
||||
};
|
||||
template<typename OP, typename DType>
|
||||
inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) {
|
||||
const DType *src = (const DType*)src_;
|
||||
DType *dst = (DType*)dst_; // NOLINT(*)
|
||||
|
||||
for (int i = 0; i < len; ++i) {
|
||||
const DType* src = (const DType*)src_;
|
||||
DType* dst = (DType*)dst_; // NOLINT(*)
|
||||
for (int i = 0; i < len; i++) {
|
||||
OP::Reduce(dst[i], src[i]);
|
||||
}
|
||||
}
|
||||
@@ -129,42 +128,39 @@ inline std::string GetProcessorName(void) {
|
||||
}
|
||||
// broadcast data to all other nodes from root
|
||||
inline void Broadcast(void *sendrecv_data, size_t size, int root,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
engine::GetEngine()->Broadcast(sendrecv_data, size, root,
|
||||
is_bootstrap, _file, _line, _caller);
|
||||
_file, _line, _caller);
|
||||
}
|
||||
template<typename DType>
|
||||
inline void Broadcast(std::vector<DType> *sendrecv_data, int root,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
size_t size = sendrecv_data->size();
|
||||
Broadcast(&size, sizeof(size), root, is_bootstrap, _file, _line, _caller);
|
||||
Broadcast(&size, sizeof(size), root, _file, _line, _caller);
|
||||
if (sendrecv_data->size() != size) {
|
||||
sendrecv_data->resize(size);
|
||||
}
|
||||
if (size != 0) {
|
||||
Broadcast(&(*sendrecv_data)[0], size * sizeof(DType), root,
|
||||
is_bootstrap, _file, _line, _caller);
|
||||
_file, _line, _caller);
|
||||
}
|
||||
}
|
||||
inline void Broadcast(std::string *sendrecv_data, int root,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
size_t size = sendrecv_data->length();
|
||||
Broadcast(&size, sizeof(size), root, is_bootstrap, _file, _line, _caller);
|
||||
Broadcast(&size, sizeof(size), root, _file, _line, _caller);
|
||||
if (sendrecv_data->length() != size) {
|
||||
sendrecv_data->resize(size);
|
||||
}
|
||||
if (size != 0) {
|
||||
Broadcast(&(*sendrecv_data)[0], size * sizeof(char), root,
|
||||
is_bootstrap, _file, _line, _caller);
|
||||
_file, _line, _caller);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,13 +169,12 @@ template<typename OP, typename DType>
|
||||
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||
void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
engine::Allreduce_(sendrecvbuf, sizeof(DType), count, op::Reducer<OP, DType>,
|
||||
engine::mpi::GetType<DType>(), OP::kType, prepare_fun, prepare_arg,
|
||||
is_bootstrap, _file, _line, _caller);
|
||||
_file, _line, _caller);
|
||||
}
|
||||
|
||||
// C++11 support for lambda prepare function
|
||||
@@ -190,13 +185,12 @@ inline void InvokeLambda_(void *fun) {
|
||||
template<typename OP, typename DType>
|
||||
inline void Allreduce(DType *sendrecvbuf, size_t count,
|
||||
std::function<void()> prepare_fun,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
engine::Allreduce_(sendrecvbuf, sizeof(DType), count, op::Reducer<OP, DType>,
|
||||
engine::mpi::GetType<DType>(), OP::kType, InvokeLambda_, &prepare_fun,
|
||||
is_bootstrap, _file, _line, _caller);
|
||||
_file, _line, _caller);
|
||||
}
|
||||
#endif // C++11
|
||||
|
||||
@@ -244,11 +238,12 @@ inline void ReducerSafe_(const void *src_, void *dst_, int len_, const MPI::Data
|
||||
const size_t kUnit = sizeof(DType);
|
||||
const char *psrc = reinterpret_cast<const char*>(src_);
|
||||
char *pdst = reinterpret_cast<char*>(dst_);
|
||||
|
||||
for (int i = 0; i < len_; ++i) {
|
||||
DType tdst, tsrc;
|
||||
// use memcpy to avoid alignment issue
|
||||
std::memcpy(&tdst, pdst + i * kUnit, sizeof(tdst));
|
||||
std::memcpy(&tsrc, psrc + i * kUnit, sizeof(tsrc));
|
||||
std::memcpy(&tdst, pdst + (i * kUnit), sizeof(tdst));
|
||||
std::memcpy(&tsrc, psrc + (i * kUnit), sizeof(tsrc));
|
||||
freduce(tdst, tsrc);
|
||||
std::memcpy(pdst + i * kUnit, &tdst, sizeof(tdst));
|
||||
}
|
||||
@@ -276,12 +271,11 @@ template<typename DType, void (*freduce)(DType &dst, const DType &src)> // NOLIN
|
||||
inline void Reducer<DType, freduce>::Allreduce(DType *sendrecvbuf, size_t count,
|
||||
void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
handle_.Allreduce(sendrecvbuf, sizeof(DType), count, prepare_fun,
|
||||
prepare_arg, is_bootstrap, _file, _line, _caller);
|
||||
prepare_arg, _file, _line, _caller);
|
||||
}
|
||||
// function to perform reduction for SerializeReducer
|
||||
template<typename DType>
|
||||
@@ -330,7 +324,6 @@ inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
|
||||
size_t max_nbyte, size_t count,
|
||||
void (*prepare_fun)(void *arg),
|
||||
void *prepare_arg,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
@@ -342,7 +335,7 @@ inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
|
||||
// invoke here
|
||||
handle_.Allreduce(BeginPtr(buffer_), max_nbyte, count,
|
||||
SerializeReduceClosure<DType>::Invoke, &c,
|
||||
is_bootstrap, _file, _line, _caller);
|
||||
_file, _line, _caller);
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer_) + i * max_nbyte, max_nbyte);
|
||||
sendrecvobj[i].Load(fs);
|
||||
@@ -353,23 +346,21 @@ inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
|
||||
template<typename DType, void (*freduce)(DType &dst, const DType &src)> // NOLINT(*)g
|
||||
inline void Reducer<DType, freduce>::Allreduce(DType *sendrecvbuf, size_t count,
|
||||
std::function<void()> prepare_fun,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
this->Allreduce(sendrecvbuf, count, InvokeLambda_, &prepare_fun,
|
||||
is_bootstrap, _file, _line, _caller);
|
||||
_file, _line, _caller);
|
||||
}
|
||||
template<typename DType>
|
||||
inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
|
||||
size_t max_nbytes, size_t count,
|
||||
std::function<void()> prepare_fun,
|
||||
bool is_bootstrap,
|
||||
const char* _file,
|
||||
const int _line,
|
||||
const char* _caller) {
|
||||
this->Allreduce(sendrecvobj, max_nbytes, count, InvokeLambda_, &prepare_fun,
|
||||
is_bootstrap, _file, _line, _caller);
|
||||
_file, _line, _caller);
|
||||
}
|
||||
#endif // DMLC_USE_CXX11
|
||||
} // namespace rabit
|
||||
|
||||
Reference in New Issue
Block a user