diff --git a/Makefile b/Makefile index 3f0b3c7cf..5738f2573 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ endif # by default use c++11 ifeq ($(no_cxx11),1) else - CFLAGS += -std=c++11 + CFLAGS += endif # specify tensor path @@ -30,7 +30,7 @@ mpi: $(MPIBIN) # rules to get rabit library librabit: if [ ! -d rabit ]; then git clone https://github.com/tqchen/rabit.git; fi - cd rabit;make lib/librabit.a; cd - + cd rabit;make lib/librabit.a lib/librabit_mock.a; cd - librabit_mpi: if [ ! -d rabit ]; then git clone https://github.com/tqchen/rabit.git; fi cd rabit;make lib/librabit_mpi.a; cd - diff --git a/multi-node/col-split/mushroom-col-rabit-mock.sh b/multi-node/col-split/mushroom-col-rabit-mock.sh index 5148a0b61..148e629a2 100755 --- a/multi-node/col-split/mushroom-col-rabit-mock.sh +++ b/multi-node/col-split/mushroom-col-rabit-mock.sh @@ -16,7 +16,7 @@ k=$1 python splitsvm.py ../../demo/data/agaricus.txt.train train $k # run xgboost mpi -../../rabit/tracker/rabit_mpi.py $k local ../../rabit/test/keepalive.sh ../../xgboost mushroom-col.conf dsplit=col mock=0,0,1,0 mock=1,1,0,0 +../../rabit/tracker/rabit_mpi.py $k local ../../rabit/test/keepalive.sh ../../xgboost mushroom-col.conf dsplit=col mock=0,1,0,0 mock=1,1,0,0 # the model can be directly loaded by single machine xgboost solver, as usuall #../../xgboost mushroom-col.conf task=dump model_in=0002.model fmap=../../demo/data/featmap.txt name_dump=dump.nice.$k.txt diff --git a/src/learner/learner-inl.hpp b/src/learner/learner-inl.hpp index c1a4bc370..201100a6f 100644 --- a/src/learner/learner-inl.hpp +++ b/src/learner/learner-inl.hpp @@ -34,6 +34,8 @@ class BoostLearner : public rabit::ISerializable { prob_buffer_row = 1.0f; distributed_mode = 0; pred_buffer_size = 0; + seed_per_iteration = 0; + seed = 0; } virtual ~BoostLearner(void) { if (obj_ != NULL) delete obj_; @@ -102,7 +104,10 @@ class BoostLearner : public rabit::ISerializable { this->SetParam("updater", "grow_colmaker,refresh,prune"); } if (!strcmp(name, "eval_metric")) evaluator_.AddEval(val); - if (!strcmp("seed", name)) random::Seed(atoi(val)); + if (!strcmp("seed", name)) { + this->seed = seed; random::Seed(atoi(val)); + } + if (!strcmp("seed_per_iter", name)) seed_per_iteration = atoi(val); if (!strcmp(name, "num_class")) this->SetParam("num_output_group", val); if (!strcmp(name, "nthread")) { omp_set_num_threads(atoi(val)); @@ -222,6 +227,9 @@ class BoostLearner : public rabit::ISerializable { * \param p_train pointer to the data matrix */ inline void UpdateOneIter(int iter, const DMatrix &train) { + if (seed_per_iteration || rabit::IsDistributed()) { + random::Seed(this->seed * kRandSeedMagic); + } this->PredictRaw(train, &preds_); obj_->GetGradient(preds_, train.info, iter, &gpair_); gbm_->DoBoost(train.fmat(), this->FindBufferOffset(train), train.info.info, &gpair_); @@ -369,6 +377,12 @@ class BoostLearner : public rabit::ISerializable { } }; // data fields + // stored random seed + int seed; + // whether seed the PRNG each iteration + // this is important for restart from existing iterations + // default set to no, but will auto switch on in distributed mode + int seed_per_iteration; // silent during training int silent; // distributed learning mode, if any, 0:none, 1:col, 2:row @@ -397,6 +411,8 @@ class BoostLearner : public rabit::ISerializable { std::vector gpair_; protected: + // magic number to transform random seed + const static int kRandSeedMagic = 127; // cache entry object that helps handle feature caching struct CacheEntry { const DMatrix *mat_; diff --git a/src/tree/updater_basemaker-inl.hpp b/src/tree/updater_basemaker-inl.hpp index 9b7c38b00..851811eae 100644 --- a/src/tree/updater_basemaker-inl.hpp +++ b/src/tree/updater_basemaker-inl.hpp @@ -76,19 +76,15 @@ class BaseMaker: public IUpdater { unsigned n = static_cast(p * findex.size()); random::Shuffle(findex); findex.resize(n); - if (n != findex.size()) { - // sync the findex if it is subsample - std::string s_cache; - utils::MemoryBufferStream fc(&s_cache); - utils::IStream &fs = fc; - if (rabit::GetRank() == 0) { - fs.Write(findex); - rabit::Broadcast(&s_cache, 0); - } else { - rabit::Broadcast(&s_cache, 0); - fs.Read(&findex); - } + // sync the findex if it is subsample + std::string s_cache; + utils::MemoryBufferStream fc(&s_cache); + utils::IStream &fs = fc; + if (rabit::GetRank() == 0) { + fs.Write(findex); } + rabit::Broadcast(&s_cache, 0); + fs.Read(&findex); } private: diff --git a/src/tree/updater_sync-inl.hpp b/src/tree/updater_sync-inl.hpp index d29743bf3..0cbbb4eed 100644 --- a/src/tree/updater_sync-inl.hpp +++ b/src/tree/updater_sync-inl.hpp @@ -40,12 +40,11 @@ class TreeSyncher: public IUpdater { for (size_t i = 0; i < trees.size(); ++i) { trees[i]->SaveModel(fs); } - rabit::Broadcast(&s_model, 0); - } else { - rabit::Broadcast(&s_model, 0); - for (size_t i = 0; i < trees.size(); ++i) { - trees[i]->LoadModel(fs); - } + } + fs.Seek(0); + rabit::Broadcast(&s_model, 0); + for (size_t i = 0; i < trees.size(); ++i) { + trees[i]->LoadModel(fs); } } }; diff --git a/src/xgboost_main.cpp b/src/xgboost_main.cpp index 89cc1b77d..a3f838131 100644 --- a/src/xgboost_main.cpp +++ b/src/xgboost_main.cpp @@ -284,8 +284,8 @@ class BoostLearnTask { } int main(int argc, char *argv[]){ - xgboost::random::Seed(0); xgboost::BoostLearnTask tsk; + tsk.SetParam("seed", "0"); int ret = tsk.Run(argc, argv); rabit::Finalize(); return ret;