finish mushroom example

This commit is contained in:
tqchen
2014-10-16 18:06:47 -07:00
parent 0cf2dd39ea
commit f512f08437
13 changed files with 167 additions and 10 deletions

View File

@@ -10,6 +10,7 @@
#include <utility>
#include <string>
#include <limits>
#include "../sync/sync.h"
#include "./objective.h"
#include "./evaluation.h"
#include "../gbm/gbm.h"
@@ -61,6 +62,7 @@ class BoostLearner {
buffer_size += mats[i]->info.num_row();
num_feature = std::max(num_feature, static_cast<unsigned>(mats[i]->info.num_col()));
}
sync::AllReduce(&num_feature, 1, sync::kMax);
char str_temp[25];
if (num_feature > mparam.num_feature) {
utils::SPrintf(str_temp, sizeof(str_temp), "%u", num_feature);

View File

@@ -15,11 +15,16 @@ namespace sync {
/*! \brief reduce operator supported */
enum ReduceOp {
kSum,
kMax,
kBitwiseOR
};
/*! \brief get rank of current process */
int GetRank(void);
/*!
* \brief this is used to check if sync module is a true distributed implementation, or simply a dummpy
*/
bool IsDistributed(void);
/*! \brief intiialize the synchronization module */
void Init(int argc, char *argv[]);
/*! \brief finalize syncrhonization module */

View File

@@ -6,18 +6,28 @@ namespace sync {
int GetRank(void) {
return 0;
}
void Init(int argc, char *argv[]) {
}
void Finalize(void) {
}
bool IsDistributed(void) {
return false;
}
template<>
void AllReduce<uint32_t>(uint32_t *sendrecvbuf, int count, ReduceOp op) {
}
template<>
void AllReduce<float>(float *sendrecvbuf, int count, ReduceOp op) {
}
void Bcast(std::string *sendrecv_data, int root) {
}
ReduceHandle::ReduceHandle(void) : handle(NULL) {}
ReduceHandle::~ReduceHandle(void) {}
void ReduceHandle::Init(ReduceFunction redfunc, bool commute) {}

View File

@@ -12,6 +12,10 @@ void Init(int argc, char *argv[]) {
MPI::Init(argc, argv);
}
bool IsDistributed(void) {
return true;
}
void Finalize(void) {
MPI::Finalize();
}
@@ -20,6 +24,7 @@ void AllReduce_(void *sendrecvbuf, int count, const MPI::Datatype &dtype, Reduce
switch(op) {
case kBitwiseOR: MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, dtype, MPI::BOR); return;
case kSum: MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, dtype, MPI::SUM); return;
case kMax: MPI::COMM_WORLD.Allreduce(MPI_IN_PLACE, sendrecvbuf, count, dtype, MPI::MAX); return;
}
}

View File

@@ -93,9 +93,15 @@ class DistColMaker : public ColMaker<TStats> {
while (fsplits.size() != 0 && fsplits.back() >= p_fmat->NumCol()) {
fsplits.pop_back();
}
// setup BitMap
bitmap.Resize(this->position.size());
bitmap.Clear();
// bitmap is only word concurrent, set to bool first
{
bst_omp_uint ndata = static_cast<bst_omp_uint>(this->position.size());
boolmap.resize(ndata);
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
boolmap[j] = 0;
}
}
utils::IIterator<ColBatch> *iter = p_fmat->ColIterator(fsplits);
while (iter->Next()) {
const ColBatch &batch = iter->Value();
@@ -110,15 +116,16 @@ class DistColMaker : public ColMaker<TStats> {
const int nid = this->DecodePosition(ridx);
if (!tree[nid].is_leaf() && tree[nid].split_index() == fid) {
if (fvalue < tree[nid].split_cond()) {
if (!tree[nid].default_left()) bitmap.SetTrue(ridx);
if (!tree[nid].default_left()) boolmap[ridx] = 1;
} else {
if (tree[nid].default_left()) bitmap.SetTrue(ridx);
if (tree[nid].default_left()) boolmap[ridx] = 1;
}
}
}
}
}
bitmap.InitFromBool(boolmap);
// communicate bitmap
sync::AllReduce(BeginPtr(bitmap.data), bitmap.data.size(), sync::kBitwiseOR);
const std::vector<bst_uint> &rowset = p_fmat->buffered_rowset();
@@ -159,6 +166,7 @@ class DistColMaker : public ColMaker<TStats> {
private:
utils::BitMap bitmap;
std::vector<int> boolmap;
sync::Reducer<SplitEntry> reducer;
};
// we directly introduce pruner here

View File

@@ -7,6 +7,7 @@
*/
#include <vector>
#include "./utils.h"
#include "./omp.h"
namespace xgboost {
namespace utils {
@@ -35,6 +36,25 @@ struct BitMap {
inline void SetTrue(size_t i) {
data[i >> 5] |= (1 << (i & 31U));
}
/*! \brief initialize the value of bit map from vector of bool*/
inline void InitFromBool(const std::vector<int> &vec) {
this->Resize(vec.size());
// parallel over the full cases
bst_omp_uint nsize = static_cast<bst_omp_uint>(vec.size() / 32);
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nsize; ++i) {
uint32_t res = 0;
for (int k = 0; k < 32; ++k) {
int bit = vec[(i << 5) | k];
res |= (bit << k);
}
data[i] = res;
}
if (nsize != vec.size()) data.back() = 0;
for (size_t i = nsize; i < vec.size(); ++i) {
if (vec[i]) this->SetTrue(i);
}
}
/*! \brief clear the bitmap, set all places to false */
inline void Clear(void) {
std::fill(data.begin(), data.end(), 0U);

View File

@@ -14,7 +14,7 @@ namespace xgboost {
/*!
* \brief wrapping the training process
*/
class BoostLearnTask{
class BoostLearnTask {
public:
inline int Run(int argc, char *argv[]) {
if (argc < 2) {
@@ -31,6 +31,9 @@ class BoostLearnTask{
this->SetParam(name, val);
}
}
if (sync::IsDistributed()) {
this->SetParam("updater", "distcol");
}
if (sync::GetRank() != 0) {
this->SetParam("silent", "2");
}
@@ -93,6 +96,7 @@ class BoostLearnTask{
name_pred = "pred.txt";
name_dump = "dump.txt";
model_dir_path = "./";
load_part = 0;
data = NULL;
}
~BoostLearnTask(void){
@@ -103,13 +107,20 @@ class BoostLearnTask{
}
private:
inline void InitData(void) {
if (strchr(train_path.c_str(), '%') != NULL) {
char s_tmp[256];
utils::SPrintf(s_tmp, sizeof(s_tmp), train_path.c_str(), sync::GetRank());
train_path = s_tmp;
load_part = 1;
}
if (name_fmap != "NULL") fmap.LoadText(name_fmap.c_str());
if (task == "dump") return;
if (task == "pred") {
data = io::LoadDataMatrix(test_path.c_str(), silent != 0, use_buffer != 0);
} else {
// training
data = io::LoadDataMatrix(train_path.c_str(), silent != 0, use_buffer != 0);
data = io::LoadDataMatrix(train_path.c_str(), silent != 0 && load_part == 0, use_buffer != 0);
utils::Assert(eval_data_names.size() == eval_data_paths.size(), "BUG");
for (size_t i = 0; i < eval_data_names.size(); ++i) {
deval.push_back(io::LoadDataMatrix(eval_data_paths[i].c_str(), silent != 0, use_buffer != 0));
@@ -182,6 +193,7 @@ class BoostLearnTask{
fclose(fo);
}
inline void SaveModel(const char *fname) const {
if (sync::GetRank() != 0) return;
utils::FileStream fo(utils::FopenCheck(fname, "wb"));
learner.SaveModel(fo);
fo.Close();
@@ -205,6 +217,8 @@ class BoostLearnTask{
private:
/*! \brief whether silent */
int silent;
/*! \brief special load */
int load_part;
/*! \brief whether use auto binary buffer */
int use_buffer;
/*! \brief whether evaluate training statistics */