remove is_bootstrap parameter (#102)

* apply openmp simd

* clean __buildin detection, moving windows build check from xgboost project, add openmp support for vectorize reduce

* apply openmp only to rabit

* orgnize rabit signature

* remove is_bootstrap, use load_checkpoint as implict flag

* visual studio don't support latest openmp

* orgnize omp declarations

* replace memory copy with vector cast

* Revert "replace memory copy with vector cast"

This reverts commit 28de4792dcdff40d83d458510d23b7ef0b191d79.

* Revert "orgnize omp declarations"

This reverts commit 31341233d31ce93ccf34d700262b1f3f6690bbfe.

* remove openmp settings, merge into a upcoming pr

* mis

* per feedback, update comments
This commit is contained in:
Chen Qin
2019-09-10 11:45:50 -07:00
committed by Nan Zhu
parent 5797dcb64e
commit 9a7ac85d7e
13 changed files with 56 additions and 137 deletions

View File

@@ -9,39 +9,15 @@
#include <string>
#include "../serializable.h"
// keeps rabit api caller signature
#ifndef RABIT_API_CALLER_SIGNATURE
#define RABIT_API_CALLER_SIGNATURE
#ifdef __has_builtin
#if __has_builtin(__builtin_FILE)
#if (defined(__GNUC__) && !defined(__clang__))
#define _FILE __builtin_FILE()
#else
#define _FILE "N/A"
#endif // __has_builtin(__builtin_FILE)
#if __has_builtin(__builtin_LINE)
#define _LINE __builtin_LINE()
#else
#define _LINE -1
#endif // __has_builtin(__builtin_LINE)
#if __has_builtin(__builtin_FUNCTION)
#define _CALLER __builtin_FUNCTION()
#else
#define _CALLER "N/A"
#endif // __has_builtin(__builtin_FUNCTION)
#else
#define _FILE "N/A"
#define _LINE -1
#define _CALLER "N/A"
#endif // __has_builtin
#endif // RABIT_API_CALLER_SIGNATURE
#endif // (defined(__GNUC__) && !defined(__clang__))
namespace MPI {
/*! \brief MPI data type just to be compatible with MPI reduce function*/
@@ -88,7 +64,6 @@ class IEngine {
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf.
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
* \param prepare_arg argument used to pass into the lazy preprocessing function
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
@@ -99,7 +74,6 @@ class IEngine {
ReduceFunction reducer,
PreprocFunction prepare_fun = NULL,
void *prepare_arg = NULL,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER) = 0;
@@ -108,13 +82,11 @@ class IEngine {
* \param sendrecvbuf_ buffer for both sending and receiving data
* \param size the size of the data to be broadcasted
* \param root the root worker id to broadcast the data
* \param is_bootstrap if this broadcast is needed to bootstrap failed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
*/
virtual void Broadcast(void *sendrecvbuf_, size_t size, int root,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER) = 0;
@@ -254,7 +226,6 @@ enum DataType {
* will be called by the function before performing Allreduce, to initialize the data in sendrecvbuf_.
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
* \param prepare_arg argument used to pass into the lazy preprocessing function.
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
@@ -267,7 +238,6 @@ void Allreduce_(void *sendrecvbuf,
mpi::OpType op,
IEngine::PreprocFunction prepare_fun = NULL,
void *prepare_arg = NULL,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER);
@@ -296,7 +266,6 @@ class ReduceHandle {
* will be called by the function before performing Allreduce in order to initialize the data in sendrecvbuf_.
* If the result of Allreduce can be recovered directly, then prepare_func will NOT be called
* \param prepare_arg argument used to pass into the lazy preprocessing function
* \param is_bootstrap if this allreduce is needed to bootstrap failed node
* \param _file caller file name used to generate unique cache key
* \param _line caller line number used to generate unique cache key
* \param _caller caller function name used to generate unique cache key
@@ -306,7 +275,6 @@ class ReduceHandle {
size_t count,
IEngine::PreprocFunction prepare_fun = NULL,
void *prepare_arg = NULL,
bool is_bootstrap = false,
const char* _file = _FILE,
const int _line = _LINE,
const char* _caller = _CALLER);

View File

@@ -94,10 +94,9 @@ struct BitOR {
};
template<typename OP, typename DType>
inline void Reducer(const void *src_, void *dst_, int len, const MPI::Datatype &dtype) {
const DType *src = (const DType*)src_;
DType *dst = (DType*)dst_; // NOLINT(*)
for (int i = 0; i < len; ++i) {
const DType* src = (const DType*)src_;
DType* dst = (DType*)dst_; // NOLINT(*)
for (int i = 0; i < len; i++) {
OP::Reduce(dst[i], src[i]);
}
}
@@ -129,42 +128,39 @@ inline std::string GetProcessorName(void) {
}
// broadcast data to all other nodes from root
inline void Broadcast(void *sendrecv_data, size_t size, int root,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
engine::GetEngine()->Broadcast(sendrecv_data, size, root,
is_bootstrap, _file, _line, _caller);
_file, _line, _caller);
}
template<typename DType>
inline void Broadcast(std::vector<DType> *sendrecv_data, int root,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
size_t size = sendrecv_data->size();
Broadcast(&size, sizeof(size), root, is_bootstrap, _file, _line, _caller);
Broadcast(&size, sizeof(size), root, _file, _line, _caller);
if (sendrecv_data->size() != size) {
sendrecv_data->resize(size);
}
if (size != 0) {
Broadcast(&(*sendrecv_data)[0], size * sizeof(DType), root,
is_bootstrap, _file, _line, _caller);
_file, _line, _caller);
}
}
inline void Broadcast(std::string *sendrecv_data, int root,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
size_t size = sendrecv_data->length();
Broadcast(&size, sizeof(size), root, is_bootstrap, _file, _line, _caller);
Broadcast(&size, sizeof(size), root, _file, _line, _caller);
if (sendrecv_data->length() != size) {
sendrecv_data->resize(size);
}
if (size != 0) {
Broadcast(&(*sendrecv_data)[0], size * sizeof(char), root,
is_bootstrap, _file, _line, _caller);
_file, _line, _caller);
}
}
@@ -173,13 +169,12 @@ template<typename OP, typename DType>
inline void Allreduce(DType *sendrecvbuf, size_t count,
void (*prepare_fun)(void *arg),
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
engine::Allreduce_(sendrecvbuf, sizeof(DType), count, op::Reducer<OP, DType>,
engine::mpi::GetType<DType>(), OP::kType, prepare_fun, prepare_arg,
is_bootstrap, _file, _line, _caller);
_file, _line, _caller);
}
// C++11 support for lambda prepare function
@@ -190,13 +185,12 @@ inline void InvokeLambda_(void *fun) {
template<typename OP, typename DType>
inline void Allreduce(DType *sendrecvbuf, size_t count,
std::function<void()> prepare_fun,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
engine::Allreduce_(sendrecvbuf, sizeof(DType), count, op::Reducer<OP, DType>,
engine::mpi::GetType<DType>(), OP::kType, InvokeLambda_, &prepare_fun,
is_bootstrap, _file, _line, _caller);
_file, _line, _caller);
}
#endif // C++11
@@ -244,11 +238,12 @@ inline void ReducerSafe_(const void *src_, void *dst_, int len_, const MPI::Data
const size_t kUnit = sizeof(DType);
const char *psrc = reinterpret_cast<const char*>(src_);
char *pdst = reinterpret_cast<char*>(dst_);
for (int i = 0; i < len_; ++i) {
DType tdst, tsrc;
// use memcpy to avoid alignment issue
std::memcpy(&tdst, pdst + i * kUnit, sizeof(tdst));
std::memcpy(&tsrc, psrc + i * kUnit, sizeof(tsrc));
std::memcpy(&tdst, pdst + (i * kUnit), sizeof(tdst));
std::memcpy(&tsrc, psrc + (i * kUnit), sizeof(tsrc));
freduce(tdst, tsrc);
std::memcpy(pdst + i * kUnit, &tdst, sizeof(tdst));
}
@@ -276,12 +271,11 @@ template<typename DType, void (*freduce)(DType &dst, const DType &src)> // NOLIN
inline void Reducer<DType, freduce>::Allreduce(DType *sendrecvbuf, size_t count,
void (*prepare_fun)(void *arg),
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
handle_.Allreduce(sendrecvbuf, sizeof(DType), count, prepare_fun,
prepare_arg, is_bootstrap, _file, _line, _caller);
prepare_arg, _file, _line, _caller);
}
// function to perform reduction for SerializeReducer
template<typename DType>
@@ -330,7 +324,6 @@ inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
size_t max_nbyte, size_t count,
void (*prepare_fun)(void *arg),
void *prepare_arg,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
@@ -342,7 +335,7 @@ inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
// invoke here
handle_.Allreduce(BeginPtr(buffer_), max_nbyte, count,
SerializeReduceClosure<DType>::Invoke, &c,
is_bootstrap, _file, _line, _caller);
_file, _line, _caller);
for (size_t i = 0; i < count; ++i) {
utils::MemoryFixSizeBuffer fs(BeginPtr(buffer_) + i * max_nbyte, max_nbyte);
sendrecvobj[i].Load(fs);
@@ -353,23 +346,21 @@ inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
template<typename DType, void (*freduce)(DType &dst, const DType &src)> // NOLINT(*)g
inline void Reducer<DType, freduce>::Allreduce(DType *sendrecvbuf, size_t count,
std::function<void()> prepare_fun,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
this->Allreduce(sendrecvbuf, count, InvokeLambda_, &prepare_fun,
is_bootstrap, _file, _line, _caller);
_file, _line, _caller);
}
template<typename DType>
inline void SerializeReducer<DType>::Allreduce(DType *sendrecvobj,
size_t max_nbytes, size_t count,
std::function<void()> prepare_fun,
bool is_bootstrap,
const char* _file,
const int _line,
const char* _caller) {
this->Allreduce(sendrecvobj, max_nbytes, count, InvokeLambda_, &prepare_fun,
is_bootstrap, _file, _line, _caller);
_file, _line, _caller);
}
#endif // DMLC_USE_CXX11
} // namespace rabit