intial version of sync wrapper

This commit is contained in:
tqchen 2014-10-15 21:39:42 -07:00
parent e295128973
commit f2577fec86
3 changed files with 164 additions and 8 deletions

View File

@ -1,8 +1,8 @@
export CC = gcc
export CXX = g++
export MPICXX = mpicxx
export LDFLAGS= -pthread -lm
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC -pedantic
export CFLAGS = -Wall -O3 -msse2 -Wno-unknown-pragmas -fPIC
ifeq ($(no_omp),1)
CFLAGS += -DDISABLE_OPENMP
@ -13,11 +13,13 @@ endif
# specify tensor path
BIN = xgboost
OBJ = updater.o gbm.o io.o
MPIOBJ = sync.o
MPIBIN = test/test
SLIB = wrapper/libxgboostwrapper.so
.PHONY: clean all python Rpack
all: $(BIN) $(OBJ) $(SLIB)
all: $(BIN) $(OBJ) $(SLIB) $(MPIOBJ) $(MPIBIN)
python: wrapper/libxgboostwrapper.so
# now the wrapper takes in two files. io and wrapper part
@ -25,8 +27,10 @@ wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp $(OBJ)
updater.o: src/tree/updater.cpp src/tree/*.hpp src/*.h src/tree/*.h
gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h
io.o: src/io/io.cpp src/io/*.hpp src/utils/*.h src/learner/dmatrix.h src/*.h
sync.o: src/sync/sync.cpp
xgboost: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h $(OBJ)
wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h $(OBJ)
test/test: test/test.cpp sync.o
$(BIN) :
$(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
@ -37,6 +41,12 @@ $(SLIB) :
$(OBJ) :
$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
$(MPIOBJ) :
$(MPICXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
$(MPIBIN) :
$(MPICXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^)
install:
cp -f -r $(BIN) $(INSTALL_PATH)

61
src/sync/sync.cpp Normal file
View File

@ -0,0 +1,61 @@
#include "./sync.h"
#include "../utils/utils.h"
#include "mpi.h"
namespace xgboost {
namespace sync {
// code for reduce handle
ReduceHandle::ReduceHandle(void) : handle(NULL) {
}
ReduceHandle::~ReduceHandle(void) {
if (handle != NULL) {
MPI::Op *op = reinterpret_cast<MPI::Op*>(handle);
op->Free();
delete op;
}
}
void ReduceHandle::Init(ReduceFunction redfunc, bool commute) {
utils::Assert(handle == NULL, "cannot initialize reduce handle twice");
MPI::Op *op = new MPI::Op();
MPI::User_function *pf = reinterpret_cast<MPI::User_function*>(redfunc);
op->Init(pf, commute);
handle = op;
}
void ReduceHandle::AllReduce(void *sendrecvbuf, size_t n4byte) {
utils::Assert(handle != NULL, "must intialize handle to call AllReduce");
MPI::Op *op = reinterpret_cast<MPI::Op*>(handle);
MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, n4byte, MPI_INT, *op);
}
int GetRank(void) {
return MPI::COMM_WORLD.Get_rank();
}
void Init(int argc, char *argv[]) {
MPI::Init(argc, argv);
}
void Finalize(void) {
MPI::Finalize();
}
void AllReduce_(void *sendrecvbuf, int count, const MPI::Datatype &dtype, ReduceOp op) {
switch(op) {
case kBitwiseOR: MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, dtype, MPI::BOR); return;
case kSum: MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, dtype, MPI::SUM); return;
}
}
template<>
void AllReduce<uint32_t>(uint32_t *sendrecvbuf, int count, ReduceOp op) {
AllReduce_(sendrecvbuf, count, MPI::UNSIGNED, op);
}
template<>
void AllReduce<float>(float *sendrecvbuf, int count, ReduceOp op) {
AllReduce_(sendrecvbuf, count, MPI::FLOAT, op);
}
} // namespace sync
} // namespace xgboost

View File

@ -5,14 +5,99 @@
* \brief interface to do synchronization
* \author Tianqi Chen
*/
#include <cstdio>
#include <cstring>
#include "../utils/utils.h"
namespace xgboost {
/*! \brief syncrhonizer module that minimum wraps MPI */
namespace sync {
/*!
* \brief synchronization context interface of xgboost,
* will be provided as a singleton
*/
class IContext {
/*! \brief reduce operator supported */
enum ReduceOp {
kBitwiseOR,
kSum
};
typedef void (ReduceFunction) (const void *src, void *dst, int len);
/* !\brief handle for customized reducer */
class ReduceHandle {
public:
// constructor
ReduceHandle(void);
// destructor
~ReduceHandle(void);
// initialize the reduce function
void Init(ReduceFunction redfunc, bool commute = true);
/*!
* \brief customized in-place all reduce operation
* \param sendrecvbuf the in place send-recv buffer
* \param n4bytes number of nbytes send through all reduce
*/
void AllReduce(void *sendrecvbuf, size_t n4bytes);
private:
// handle data field
void *handle;
};
/*! \brief get rank of current process */
int GetRank(void);
/*! \brief intiialize the synchronization module */
void Init(int argc, char *argv[]);
/*! \brief finalize syncrhonization module */
void Finalize(void);
/*!
* \brief in-place all reduce operation
* \param sendrecvbuf the in place send-recv buffer
* \param count count of data
* \param op reduction function
*/
template<typename DType>
void AllReduce(DType *sendrecvbuf, int count, ReduceOp op);
/*!
* \brief template class to make customized reduce and all reduce easy
* Do not use reducer directly in the function you call Finalize, because the destructor can happen after Finalize
* \tparam DType data type that to be reduced
* DType must be a struct, with no pointer, and contains a function Reduce(const DType &d);
*/
template<typename DType>
class Reducer {
public:
Reducer(void) {
handle.Init(ReduceInner);
utils::Assert(sizeof(DType) % sizeof(int) == 0, "struct must be multiple of int");
}
/*!
* \brief customized in-place all reduce operation
* \param sendrecvbuf the in place send-recv buffer
* \param bytes number of 4bytes send through all reduce
* \param reducer the reducer function
*/
inline void AllReduce(DType *sendrecvbuf, int count) {
handle.AllReduce(sendrecvbuf, count * kUnit);
}
private:
// unit size
static const size_t kUnit = sizeof(DType) / sizeof(int);
// inner implementation of reducer
inline static void ReduceInner(const void *src_, void *dst_, int len_) {
const int *psrc = reinterpret_cast<const int*>(src_);
int *pdst = reinterpret_cast<int*>(dst_);
DType tdst, tsrc;
utils::Assert(len_ % kUnit == 0, "length not divide by size");
for (size_t i = 0; i < len_; i += kUnit) {
// use memcpy to avoid alignment issue
std::memcpy(&tdst, pdst + i, sizeof(tdst));
std::memcpy(&tsrc, psrc + i, sizeof(tsrc));
tdst.Reduce(tsrc);
std::memcpy(pdst + i, &tdst, sizeof(tdst));
}
}
// function handle
ReduceHandle handle;
};
} // namespace sync