parent
7d52c0b8c2
commit
bd653fad4c
@ -157,7 +157,6 @@ Parameters for Tree Booster
|
||||
- A comma separated string defining the sequence of tree updaters to run, providing a modular way to construct and to modify the trees. This is an advanced parameter that is usually set automatically, depending on some other parameters. However, it could be also set explicitly by a user. The following updaters exist:
|
||||
|
||||
- ``grow_colmaker``: non-distributed column-based construction of trees.
|
||||
- ``distcol``: distributed tree construction with column-based data splitting mode.
|
||||
- ``grow_histmaker``: distributed tree construction with row-based data splitting based on global proposal of histogram counting.
|
||||
- ``grow_local_histmaker``: based on local histogram counting.
|
||||
- ``grow_skmaker``: uses the approximate sketching algorithm.
|
||||
|
||||
@ -1,67 +0,0 @@
|
||||
/*!
|
||||
* Copyright 2014 by Contributors
|
||||
* \file bitmap.h
|
||||
* \brief a simple implement of bitmap
|
||||
* NOTE: bitmap is only threadsafe per word access, remember this when using bitmap
|
||||
* \author Tianqi Chen
|
||||
*/
|
||||
#ifndef XGBOOST_COMMON_BITMAP_H_
|
||||
#define XGBOOST_COMMON_BITMAP_H_
|
||||
|
||||
#include <dmlc/omp.h>
|
||||
#include <vector>
|
||||
|
||||
namespace xgboost {
|
||||
namespace common {
|
||||
/*! \brief bit map that contains set of bit indicators */
|
||||
struct BitMap {
|
||||
/*! \brief internal data structure */
|
||||
std::vector<uint32_t> data;
|
||||
/*!
|
||||
* \brief resize the bitmap to be certain size
|
||||
* \param size the size of bitmap
|
||||
*/
|
||||
inline void Resize(size_t size) {
|
||||
data.resize((size + 31U) >> 5, 0);
|
||||
}
|
||||
/*!
|
||||
* \brief query the i-th position of bitmap
|
||||
* \param i the position in
|
||||
*/
|
||||
inline bool Get(size_t i) const {
|
||||
return (data[i >> 5] >> (i & 31U)) & 1U;
|
||||
}
|
||||
/*!
|
||||
* \brief set i-th position to true
|
||||
* \param i position index
|
||||
*/
|
||||
inline void SetTrue(size_t i) {
|
||||
data[i >> 5] |= (1 << (i & 31U));
|
||||
}
|
||||
/*! \brief initialize the value of bit map from vector of bool*/
|
||||
inline void InitFromBool(const std::vector<int>& vec) {
|
||||
this->Resize(vec.size());
|
||||
// parallel over the full cases
|
||||
auto nsize = static_cast<bst_omp_uint>(vec.size() / 32);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
uint32_t res = 0;
|
||||
for (int k = 0; k < 32; ++k) {
|
||||
uint32_t bit = vec[(i << 5) | k];
|
||||
res |= (bit << k);
|
||||
}
|
||||
data[i] = res;
|
||||
}
|
||||
if (nsize != vec.size()) data.back() = 0;
|
||||
for (size_t i = nsize; i < vec.size(); ++i) {
|
||||
if (vec[i]) this->SetTrue(i);
|
||||
}
|
||||
}
|
||||
/*! \brief clear the bitmap, set all places to false */
|
||||
inline void Clear() {
|
||||
std::fill(data.begin(), data.end(), 0U);
|
||||
}
|
||||
};
|
||||
} // namespace common
|
||||
} // namespace xgboost
|
||||
#endif // XGBOOST_COMMON_BITMAP_H_
|
||||
@ -195,8 +195,7 @@ class GBTree : public GradientBooster {
|
||||
void LoadModel(Json const& in) override;
|
||||
|
||||
bool AllowLazyCheckPoint() const override {
|
||||
return model_.learner_model_param->num_output_group == 1 ||
|
||||
tparam_.updater_seq.find("distcol") != std::string::npos;
|
||||
return model_.learner_model_param->num_output_group == 1;
|
||||
}
|
||||
|
||||
void PredictBatch(DMatrix* p_fmat,
|
||||
|
||||
@ -888,8 +888,6 @@ class LearnerImpl : public LearnerIO {
|
||||
CHECK(tparam_.dsplit != DataSplitMode::kAuto)
|
||||
<< "Precondition violated; dsplit cannot be 'auto' in distributed mode";
|
||||
if (tparam_.dsplit == DataSplitMode::kCol) {
|
||||
// 'distcol' updater hidden until it becomes functional again
|
||||
// See discussion at https://github.com/dmlc/xgboost/issues/1832
|
||||
LOG(FATAL) << "Column-wise data split is currently not supported.";
|
||||
}
|
||||
}
|
||||
|
||||
@ -17,7 +17,6 @@
|
||||
#include "param.h"
|
||||
#include "constraints.h"
|
||||
#include "../common/random.h"
|
||||
#include "../common/bitmap.h"
|
||||
#include "split_evaluator.h"
|
||||
|
||||
namespace xgboost {
|
||||
@ -618,171 +617,10 @@ class ColMaker: public TreeUpdater {
|
||||
};
|
||||
};
|
||||
|
||||
// distributed column maker
|
||||
class DistColMaker : public ColMaker {
|
||||
public:
|
||||
void Configure(const Args& args) override {
|
||||
param_.UpdateAllowUnknown(args);
|
||||
pruner_.reset(TreeUpdater::Create("prune", tparam_));
|
||||
pruner_->Configure(args);
|
||||
spliteval_.reset(SplitEvaluator::Create(param_.split_evaluator));
|
||||
spliteval_->Init(¶m_);
|
||||
}
|
||||
|
||||
char const* Name() const override {
|
||||
return "distcol";
|
||||
}
|
||||
|
||||
void Update(HostDeviceVector<GradientPair> *gpair,
|
||||
DMatrix* dmat,
|
||||
const std::vector<RegTree*> &trees) override {
|
||||
CHECK_EQ(trees.size(), 1U) << "DistColMaker: only support one tree at a time";
|
||||
this->LazyGetColumnDensity(dmat);
|
||||
Builder builder(
|
||||
param_,
|
||||
colmaker_param_,
|
||||
std::unique_ptr<SplitEvaluator>(spliteval_->GetHostClone()),
|
||||
interaction_constraints_, column_densities_);
|
||||
// build the tree
|
||||
builder.Update(gpair->ConstHostVector(), dmat, trees[0]);
|
||||
//// prune the tree, note that pruner will sync the tree
|
||||
pruner_->Update(gpair, dmat, trees);
|
||||
// update position after the tree is pruned
|
||||
builder.UpdatePosition(dmat, *trees[0]);
|
||||
}
|
||||
|
||||
private:
|
||||
class Builder : public ColMaker::Builder {
|
||||
public:
|
||||
explicit Builder(const TrainParam ¶m,
|
||||
ColMakerTrainParam const &colmaker_train_param,
|
||||
std::unique_ptr<SplitEvaluator> spliteval,
|
||||
FeatureInteractionConstraintHost _interaction_constraints,
|
||||
const std::vector<float> &column_densities)
|
||||
: ColMaker::Builder(param, colmaker_train_param,
|
||||
std::move(spliteval),
|
||||
std::move(_interaction_constraints),
|
||||
column_densities) {}
|
||||
inline void UpdatePosition(DMatrix* p_fmat, const RegTree &tree) {
|
||||
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
|
||||
int nid = this->DecodePosition(ridx);
|
||||
while (tree[nid].IsDeleted()) {
|
||||
nid = tree[nid].Parent();
|
||||
CHECK_GE(nid, 0);
|
||||
}
|
||||
this->position_[ridx] = nid;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetNonDefaultPosition(const std::vector<int> &qexpand, DMatrix *p_fmat,
|
||||
const RegTree &tree) override {
|
||||
// step 2, classify the non-default data into right places
|
||||
std::vector<unsigned> fsplits;
|
||||
for (int nid : qexpand) {
|
||||
if (!tree[nid].IsLeaf()) {
|
||||
fsplits.push_back(tree[nid].SplitIndex());
|
||||
}
|
||||
}
|
||||
// get the candidate split index
|
||||
std::sort(fsplits.begin(), fsplits.end());
|
||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||
while (fsplits.size() != 0 && fsplits.back() >= p_fmat->Info().num_col_) {
|
||||
fsplits.pop_back();
|
||||
}
|
||||
// bitmap is only word concurrent, set to bool first
|
||||
{
|
||||
auto ndata = static_cast<bst_omp_uint>(this->position_.size());
|
||||
boolmap_.resize(ndata);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
boolmap_[j] = 0;
|
||||
}
|
||||
}
|
||||
for (const auto &batch : p_fmat->GetBatches<SortedCSCPage>()) {
|
||||
for (auto fid : fsplits) {
|
||||
auto col = batch[fid];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
const bst_uint ridx = col[j].index;
|
||||
const bst_float fvalue = col[j].fvalue;
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
if (!tree[nid].IsLeaf() && tree[nid].SplitIndex() == fid) {
|
||||
if (fvalue < tree[nid].SplitCond()) {
|
||||
if (!tree[nid].DefaultLeft()) boolmap_[ridx] = 1;
|
||||
} else {
|
||||
if (tree[nid].DefaultLeft()) boolmap_[ridx] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bitmap_.InitFromBool(boolmap_);
|
||||
// communicate bitmap
|
||||
rabit::Allreduce<rabit::op::BitOR>(dmlc::BeginPtr(bitmap_.data), bitmap_.data.size());
|
||||
// get the new position
|
||||
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
if (bitmap_.Get(ridx)) {
|
||||
CHECK(!tree[nid].IsLeaf()) << "inconsistent reduce information";
|
||||
if (tree[nid].DefaultLeft()) {
|
||||
this->SetEncodePosition(ridx, tree[nid].RightChild());
|
||||
} else {
|
||||
this->SetEncodePosition(ridx, tree[nid].LeftChild());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// synchronize the best solution of each node
|
||||
void SyncBestSolution(const std::vector<int> &qexpand) override {
|
||||
std::vector<SplitEntry> vec;
|
||||
for (int nid : qexpand) {
|
||||
for (int tid = 0; tid < this->nthread_; ++tid) {
|
||||
this->snode_[nid].best.Update(this->stemp_[tid][nid].best);
|
||||
}
|
||||
vec.push_back(this->snode_[nid].best);
|
||||
}
|
||||
// TODO(tqchen) lazy version
|
||||
// communicate best solution
|
||||
reducer_.Allreduce(dmlc::BeginPtr(vec), vec.size());
|
||||
// assign solution back
|
||||
for (size_t i = 0; i < qexpand.size(); ++i) {
|
||||
const int nid = qexpand[i];
|
||||
this->snode_[nid].best = vec[i];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
common::BitMap bitmap_;
|
||||
std::vector<int> boolmap_;
|
||||
rabit::Reducer<SplitEntry, SplitEntry::Reduce> reducer_;
|
||||
};
|
||||
// we directly introduce pruner here
|
||||
std::unique_ptr<TreeUpdater> pruner_;
|
||||
// training parameter
|
||||
TrainParam param_;
|
||||
// Cloned for each builder instantiation
|
||||
std::unique_ptr<SplitEvaluator> spliteval_;
|
||||
|
||||
FeatureInteractionConstraintHost interaction_constraints_;
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(ColMaker, "grow_colmaker")
|
||||
.describe("Grow tree with parallelization over columns.")
|
||||
.set_body([]() {
|
||||
return new ColMaker();
|
||||
});
|
||||
|
||||
XGBOOST_REGISTER_TREE_UPDATER(DistColMaker, "distcol")
|
||||
.describe("Distributed column split version of tree maker.")
|
||||
.set_body([]() {
|
||||
return new DistColMaker();
|
||||
});
|
||||
} // namespace tree
|
||||
} // namespace xgboost
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user