From a21df0770dc53a11147300e443ba159f8184add5 Mon Sep 17 00:00:00 2001 From: tqchen Date: Thu, 16 Oct 2014 13:03:42 -0700 Subject: [PATCH] make clear seperation --- Makefile | 19 ++++++++++--------- demo/binary_classification/runexp.sh | 8 ++++---- src/sync/sync_empty.cpp | 27 +++++++++++++++++++++++++++ src/sync/{sync.cpp => sync_mpi.cpp} | 3 +-- src/tree/updater_distcol-inl.hpp | 15 +++++++++------ src/xgboost_main.cpp | 15 ++++++++++++--- 6 files changed, 63 insertions(+), 24 deletions(-) create mode 100644 src/sync/sync_empty.cpp rename src/sync/{sync.cpp => sync_mpi.cpp} (98%) diff --git a/Makefile b/Makefile index bdc4fb583..a6e7f3daa 100644 --- a/Makefile +++ b/Makefile @@ -11,11 +11,11 @@ else endif # specify tensor path -BIN = -OBJ = updater.o gbm.o io.o main.o -MPIOBJ = sync.o -MPIBIN = xgboost -SLIB = #wrapper/libxgboostwrapper.so +BIN = xgboost +OBJ = updater.o gbm.o io.o main.o sync_empty.o +MPIOBJ = sync_mpi.o +MPIBIN = xgboost-mpi +SLIB = wrapper/libxgboostwrapper.so .PHONY: clean all python Rpack @@ -27,11 +27,12 @@ wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp $(OBJ) updater.o: src/tree/updater.cpp src/tree/*.hpp src/*.h src/tree/*.h gbm.o: src/gbm/gbm.cpp src/gbm/*.hpp src/gbm/*.h io.o: src/io/io.cpp src/io/*.hpp src/utils/*.h src/learner/dmatrix.h src/*.h -sync.o: src/sync/sync.cpp +sync_mpi.o: src/sync/sync_mpi.cpp +sync_empty.o: src/sync/sync_empty.cpp main.o: src/xgboost_main.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h -xgboost: $(OBJ) $(MPIOBJ) -#wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h $(OBJ) -test/test: test/test.cpp sync.o +xgboost: updater.o gbm.o io.o main.o sync_empty.o +xgboost-mpi: updater.o gbm.o io.o main.o sync_mpi.o +wrapper/libxgboostwrapper.so: wrapper/xgboost_wrapper.cpp src/utils/*.h src/*.h src/learner/*.hpp src/learner/*.h $(OBJ) $(BIN) : $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) diff --git a/demo/binary_classification/runexp.sh b/demo/binary_classification/runexp.sh index 68c3e6fb9..c1f191e61 100755 --- a/demo/binary_classification/runexp.sh +++ b/demo/binary_classification/runexp.sh @@ -4,12 +4,12 @@ python mapfeat.py # split train and test python mknfold.py agaricus.txt 1 # training and output the models -../../xgboost mushroom.conf +mpirun ../../xgboost mushroom.conf # output prediction task=pred -../../xgboost mushroom.conf task=pred model_in=0002.model +mpirun ../../xgboost mushroom.conf task=pred model_in=0002.model # print the boosters of 00002.model in dump.raw.txt -../../xgboost mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt +mpirun ../../xgboost mushroom.conf task=dump model_in=0002.model name_dump=dump.raw.txt # use the feature map in printing for better visualization -../../xgboost mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt +mpirun ../../xgboost mushroom.conf task=dump model_in=0002.model fmap=featmap.txt name_dump=dump.nice.txt cat dump.nice.txt diff --git a/src/sync/sync_empty.cpp b/src/sync/sync_empty.cpp new file mode 100644 index 000000000..e46a6906a --- /dev/null +++ b/src/sync/sync_empty.cpp @@ -0,0 +1,27 @@ +#include "./sync.h" +#include "../utils/utils.h" +// no synchronization module, single thread mode does not need it anyway +namespace xgboost { +namespace sync { +int GetRank(void) { + return 0; +} +void Init(int argc, char *argv[]) { +} +void Finalize(void) { +} +template<> +void AllReduce(uint32_t *sendrecvbuf, int count, ReduceOp op) { +} +template<> +void AllReduce(float *sendrecvbuf, int count, ReduceOp op) { +} +void Bcast(std::string *sendrecv_data, int root) { +} +ReduceHandle::ReduceHandle(void) : handle(NULL) {} +ReduceHandle::~ReduceHandle(void) {} +void ReduceHandle::Init(ReduceFunction redfunc, bool commute) {} +void ReduceHandle::AllReduce(void *sendrecvbuf, size_t n4byte) {} +} // namespace sync +} // namespace xgboost + diff --git a/src/sync/sync.cpp b/src/sync/sync_mpi.cpp similarity index 98% rename from src/sync/sync.cpp rename to src/sync/sync_mpi.cpp index ced5e2cb1..2890ab609 100644 --- a/src/sync/sync.cpp +++ b/src/sync/sync_mpi.cpp @@ -1,10 +1,9 @@ #include "./sync.h" #include "../utils/utils.h" #include "mpi.h" - +// use MPI to implement sync namespace xgboost { namespace sync { - int GetRank(void) { return MPI::COMM_WORLD.Get_rank(); } diff --git a/src/tree/updater_distcol-inl.hpp b/src/tree/updater_distcol-inl.hpp index e5d1450a5..86fb558b2 100644 --- a/src/tree/updater_distcol-inl.hpp +++ b/src/tree/updater_distcol-inl.hpp @@ -32,13 +32,13 @@ class DistColMaker : public ColMaker { utils::Check(trees.size() == 1, "DistColMaker: only support one tree at a time"); // build the tree builder.Update(gpair, p_fmat, info, trees[0]); - // prune the tree + //// prune the tree pruner.Update(gpair, p_fmat, info, trees); this->SyncTrees(trees[0]); // update position after the tree is pruned builder.UpdatePosition(p_fmat, *trees[0]); } - + private: inline void SyncTrees(RegTree *tree) { std::string s_model; @@ -63,10 +63,12 @@ class DistColMaker : public ColMaker { #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < ndata; ++i) { const bst_uint ridx = rowset[i]; - int nid = this->position[ridx]; - if (nid < 0) { - + int nid = this->DecodePosition(ridx); + while (tree[nid].is_deleted()) { + nid = tree[nid].parent(); + utils::Assert(nid >=0, "distributed learning error"); } + this->position[ridx] = nid; } } protected: @@ -111,6 +113,7 @@ class DistColMaker : public ColMaker { } } } + // communicate bitmap sync::AllReduce(BeginPtr(bitmap.data), bitmap.data.size(), sync::kBitwiseOR); const std::vector &rowset = p_fmat->buffered_rowset(); @@ -125,7 +128,7 @@ class DistColMaker : public ColMaker { if (tree[nid].default_left()) { this->SetEncodePosition(ridx, tree[nid].cright()); } else { - this->SetEncodePosition(ridx, tree[nid].cright()); + this->SetEncodePosition(ridx, tree[nid].cleft()); } } } diff --git a/src/xgboost_main.cpp b/src/xgboost_main.cpp index 75544dd0e..e96342f69 100644 --- a/src/xgboost_main.cpp +++ b/src/xgboost_main.cpp @@ -5,6 +5,7 @@ #include #include #include "io/io.h" +#include "sync/sync.h" #include "utils/utils.h" #include "utils/config.h" #include "learner/learner-inl.hpp" @@ -19,7 +20,7 @@ class BoostLearnTask{ if (argc < 2) { printf("Usage: \n"); return 0; - } + } utils::ConfigIterator itr(argv[1]); while (itr.Next()) { this->SetParam(itr.name(), itr.val()); @@ -30,6 +31,9 @@ class BoostLearnTask{ this->SetParam(name, val); } } + if (sync::GetRank() != 0) { + this->SetParam("silent", "2"); + } this->InitData(); this->InitLearner(); if (task == "dump") { @@ -145,7 +149,9 @@ class BoostLearnTask{ if (!silent) printf("boosting round %d, %lu sec elapsed\n", i, elapsed); learner.UpdateOneIter(i, *data); std::string res = learner.EvalOneIter(i, devalall, eval_data_names); - fprintf(stderr, "%s\n", res.c_str()); + if (silent < 1) { + fprintf(stderr, "%s\n", res.c_str()); + } if (save_period != 0 && (i + 1) % save_period == 0) { this->SaveModel(i); } @@ -243,7 +249,10 @@ class BoostLearnTask{ } int main(int argc, char *argv[]){ + xgboost::sync::Init(argc, argv); xgboost::random::Seed(0); xgboost::BoostLearnTask tsk; - return tsk.Run(argc, argv); + int ret = tsk.Run(argc, argv); + xgboost::sync::Finalize(); + return ret; }