diff --git a/src/sync/sync.h b/src/sync/sync.h new file mode 100644 index 000000000..1d9be719c --- /dev/null +++ b/src/sync/sync.h @@ -0,0 +1,20 @@ +#ifndef XGBOOST_SYNC_SYNC_H_ +#define XGBOOST_SYNC_SYNC_H_ +/*! + * \file sync.h + * \brief interface to do synchronization + * \author Tianqi Chen + */ +namespace xgboost { +namespace sync { +/*! + * \brief synchronization context interface of xgboost, + * will be provided as a singleton + */ +class IContext { + +}; + +} // namespace sync +} // namespace xgboost +#endif diff --git a/src/tree/updater_distcol-inl.hpp b/src/tree/updater_distcol-inl.hpp new file mode 100644 index 000000000..f5d37c1fc --- /dev/null +++ b/src/tree/updater_distcol-inl.hpp @@ -0,0 +1,123 @@ +#ifndef XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ +#define XGBOOST_TREE_UPDATER_DISTCOL_INL_HPP_ +/*! + * \file updater_distcol-inl.hpp + * \brief beta distributed version that takes a sub-column + * and construct a tree + * \author Tianqi Chen + */ +#include "../utils/bitmap.h" +#include "./updater_colmaker-inl.hpp" + +namespace xgboost { +namespace tree { +template +class DistColMaker : public ColMaker { + public: + DistColMaker(void) : builder(param) {} + virtual ~DistColMaker(void) {} + // set training parameter + virtual void SetParam(const char *name, const char *val) { + param.SetParam(name, val); + } + virtual void Update(const std::vector &gpair, + IFMatrix *p_fmat, + const BoosterInfo &info, + const std::vector &trees) { + TStats::CheckInfo(info); + utils::Check(trees.size() == 1, "DistColMaker: only support one tree at a time"); + builder.Update(gpair, p_fmat, info, trees[0]); + } + private: + struct Builder : public ColMaker::Builder { + public: + Builder(const TrainParam ¶m) + : ColMaker::Builder(param) { + } + protected: + virtual void SetNonDefaultPosition(const std::vector &qexpand, + IFMatrix *p_fmat, const RegTree &tree) { + // step 2, classify the non-default data into right places + std::vector fsplits; + for (size_t i = 0; i < qexpand.size(); ++i) { + const int nid = qexpand[i]; + if (!tree[nid].is_leaf()) { + fsplits.push_back(tree[nid].split_index()); + } + } + // get the candidate split index + std::sort(fsplits.begin(), fsplits.end()); + fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); + while (fsplits.size() != 0 && fsplits.back() >= p_fmat->NumCol()) { + fsplits.pop_back(); + } + // setup BitMap + bitmap.Resize(this->position.size()); + bitmap.Clear(); + utils::IIterator *iter = p_fmat->ColIterator(fsplits); + while (iter->Next()) { + const ColBatch &batch = iter->Value(); + for (size_t i = 0; i < batch.size; ++i) { + ColBatch::Inst col = batch[i]; + const bst_uint fid = batch.col_index[i]; + const bst_omp_uint ndata = static_cast(col.length); + #pragma omp parallel for schedule(static) + for (bst_omp_uint j = 0; j < ndata; ++j) { + const bst_uint ridx = col[j].index; + const float fvalue = col[j].fvalue; + int nid = this->position[ridx]; + if (nid < 0) continue; + // go back to parent, correct those who are not default + nid = tree[nid].parent(); + if (tree[nid].split_index() == fid) { + if (fvalue < tree[nid].split_cond()) { + if (!tree[nid].default_left()) bitmap.SetTrue(ridx); + } else { + if (tree[nid].default_left()) bitmap.SetTrue(ridx); + } + } + } + } + } + // communicate bitmap + //sync::AllReduce(); + const std::vector &rowset = p_fmat->buffered_rowset(); + // get the new position + const bst_omp_uint ndata = static_cast(rowset.size()); + #pragma omp parallel for schedule(static) + for (bst_omp_uint i = 0; i < ndata; ++i) { + const bst_uint ridx = rowset[i]; + int nid = this->position[ridx]; + if (nid >= 0 && bitmap.Get(ridx)) { + nid = tree[nid].parent(); + if (tree[nid].default_left()) { + this->position[ridx] = tree[nid].cright(); + } else { + this->position[ridx] = tree[nid].cleft(); + } + } + } + } + // synchronize the best solution of each node + virtual void SyncBestSolution(const std::vector &qexpand) { + for (size_t i = 0; i < qexpand.size(); ++i) { + const int nid = qexpand[i]; + for (int tid = 0; tid < this->nthread; ++tid) { + this->snode[nid].best.Update(this->stemp[tid][nid].best); + } + } + // communicate best solution + // sync::AllReduce + } + + private: + utils::BitMap bitmap; + }; + // training parameter + TrainParam param; + // pointer to the builder + Builder builder; +}; +} // namespace tree +} // namespace xgboost +#endif diff --git a/src/utils/bitmap.h b/src/utils/bitmap.h new file mode 100644 index 000000000..9c7cf2fc2 --- /dev/null +++ b/src/utils/bitmap.h @@ -0,0 +1,45 @@ +#ifndef XGBOOST_UTILS_BITMAP_H_ +#define XGBOOST_UTILS_BITMAP_H_ +/*! + * \file bitmap.h + * \brief a simple implement of bitmap + * \author Tianqi Chen + */ +#include +#include "./utils.h" + +namespace xgboost { +namespace utils { +/*! \brief bit map that contains set of bit indicators */ +struct BitMap { + /*! \brief internal data structure */ + std::vector data; + /*! + * \brief resize the bitmap to be certain size + * \param size the size of bitmap + */ + inline void Resize(size_t size) { + data.resize((size + 31U) >> 5, 0); + } + /*! + * \brief query the i-th position of bitmap + * \param i the position in + */ + inline bool Get(size_t i) const { + return (data[i >> 5] >> (i & 31U)) & 1U; + } + /*! + * \brief set i-th position to true + * \param i position index + */ + inline void SetTrue(size_t i) { + data[i >> 5] |= (1 << (i & 31U)); + } + /*! \brief clear the bitmap, set all places to false */ + inline void Clear(void) { + std::fill(data.begin(), data.end(), 0U); + } +}; +} // namespace utils +} // namespace xgboost +#endif