Dmatrix refactor stage 2 (#3395)

* DMatrix refactor 2

* Remove buffered rowset usage where possible

* Transition to c++11 style iterators for row access

* Transition column iterators to C++ 11
This commit is contained in:
Rory Mitchell
2018-10-01 01:29:03 +13:00
committed by GitHub
parent b50bc2c1d4
commit 70d208d68c
36 changed files with 459 additions and 846 deletions

View File

@@ -43,15 +43,14 @@ class BaseMaker: public TreeUpdater {
std::fill(fminmax_.begin(), fminmax_.end(),
-std::numeric_limits<bst_float>::max());
// start accumulating statistics
auto iter = p_fmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
for (bst_uint fid = 0; fid < batch.Size(); ++fid) {
auto c = batch[fid];
auto c = batch[fid];
if (c.size() != 0) {
fminmax_[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]);
fminmax_[fid * 2 + 1] = std::max(c[c.size() - 1].fvalue, fminmax_[fid * 2 + 1]);
fminmax_[fid * 2 + 0] =
std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]);
fminmax_[fid * 2 + 1] =
std::max(c[c.size() - 1].fvalue, fminmax_[fid * 2 + 1]);
}
}
}
@@ -208,16 +207,13 @@ class BaseMaker: public TreeUpdater {
*/
inline void SetDefaultPostion(DMatrix *p_fmat,
const RegTree &tree) {
// set rest of instances to default position
const RowSet &rowset = p_fmat->BufferedRowset();
// set default direct nodes to default
// for leaf nodes that are not fresh, mark then to ~nid,
// so that they are ignored in future statistics collection
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i];
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
const int nid = this->DecodePosition(ridx);
if (tree[nid].IsLeaf()) {
// mark finish when it is not a fresh leaf
@@ -303,9 +299,7 @@ class BaseMaker: public TreeUpdater {
const RegTree &tree) {
std::vector<unsigned> fsplits;
this->GetSplitSet(nodes, tree, &fsplits);
auto iter = p_fmat->ColIterator();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
for (auto fid : fsplits) {
auto col = batch[fid];
const auto ndata = static_cast<bst_omp_uint>(col.size());
@@ -345,12 +339,10 @@ class BaseMaker: public TreeUpdater {
thread_temp[tid][nid].Clear();
}
}
const RowSet &rowset = fmat.BufferedRowset();
// setup position
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
const auto ndata = static_cast<bst_omp_uint>(fmat.Info().num_row_);
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i];
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
const int nid = position_[ridx];
const int tid = omp_get_thread_num();
if (nid >= 0) {

View File

@@ -141,32 +141,27 @@ class ColMaker: public TreeUpdater {
CHECK_EQ(tree.param.num_nodes, tree.param.num_roots)
<< "ColMaker: can only grow new tree";
const std::vector<unsigned>& root_index = fmat.Info().root_index_;
const RowSet& rowset = fmat.BufferedRowset();
{
// setup position
position_.resize(gpair.size());
CHECK_EQ(fmat.Info().num_row_, position_.size());
if (root_index.size() == 0) {
for (size_t i = 0; i < rowset.Size(); ++i) {
position_[rowset[i]] = 0;
}
std::fill(position_.begin(), position_.end(), 0);
} else {
for (size_t i = 0; i < rowset.Size(); ++i) {
const bst_uint ridx = rowset[i];
for (size_t ridx = 0; ridx < position_.size(); ++ridx) {
position_[ridx] = root_index[ridx];
CHECK_LT(root_index[ridx], (unsigned)tree.param.num_roots);
}
}
// mark delete for the deleted datas
for (size_t i = 0; i < rowset.Size(); ++i) {
const bst_uint ridx = rowset[i];
for (size_t ridx = 0; ridx < position_.size(); ++ridx) {
if (gpair[ridx].GetHess() < 0.0f) position_[ridx] = ~position_[ridx];
}
// mark subsample
if (param_.subsample < 1.0f) {
std::bernoulli_distribution coin_flip(param_.subsample);
auto& rnd = common::GlobalRandom();
for (size_t i = 0; i < rowset.Size(); ++i) {
const bst_uint ridx = rowset[i];
for (size_t ridx = 0; ridx < position_.size(); ++ridx) {
if (gpair[ridx].GetHess() < 0.0f) continue;
if (!coin_flip(rnd)) position_[ridx] = ~position_[ridx];
}
@@ -209,13 +204,11 @@ class ColMaker: public TreeUpdater {
}
snode_.resize(tree.param.num_nodes, NodeEntry(param_));
}
const RowSet &rowset = fmat.BufferedRowset();
const MetaInfo& info = fmat.Info();
// setup position
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
const auto ndata = static_cast<bst_omp_uint>(info.num_row_);
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i];
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
const int tid = omp_get_thread_num();
if (position_[ridx] < 0) continue;
stemp_[tid][position_[ridx]].stats.Add(gpair, info, ridx);
@@ -254,13 +247,13 @@ class ColMaker: public TreeUpdater {
// this function does not support nested functions
inline void ParallelFindSplit(const SparsePage::Inst &col,
bst_uint fid,
const DMatrix &fmat,
DMatrix *p_fmat,
const std::vector<GradientPair> &gpair) {
// TODO(tqchen): double check stats order.
const MetaInfo& info = fmat.Info();
const MetaInfo& info = p_fmat->Info();
const bool ind = col.size() != 0 && col[0].fvalue == col[col.size() - 1].fvalue;
bool need_forward = param_.NeedForwardSearch(fmat.GetColDensity(fid), ind);
bool need_backward = param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind);
bool need_forward = param_.NeedForwardSearch(p_fmat->GetColDensity(fid), ind);
bool need_backward = param_.NeedBackwardSearch(p_fmat->GetColDensity(fid), ind);
const std::vector<int> &qexpand = qexpand_;
#pragma omp parallel
{
@@ -592,8 +585,8 @@ class ColMaker: public TreeUpdater {
virtual void UpdateSolution(const SparsePage &batch,
const std::vector<int> &feat_set,
const std::vector<GradientPair> &gpair,
const DMatrix &fmat) {
const MetaInfo& info = fmat.Info();
DMatrix*p_fmat) {
const MetaInfo& info = p_fmat->Info();
// start enumeration
const auto num_features = static_cast<bst_omp_uint>(feat_set.size());
#if defined(_OPENMP)
@@ -610,11 +603,11 @@ class ColMaker: public TreeUpdater {
const int tid = omp_get_thread_num();
auto c = batch[fid];
const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue;
if (param_.NeedForwardSearch(fmat.GetColDensity(fid), ind)) {
if (param_.NeedForwardSearch(p_fmat->GetColDensity(fid), ind)) {
this->EnumerateSplit(c.data(), c.data() + c.size(), +1,
fid, gpair, info, stemp_[tid]);
}
if (param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind)) {
if (param_.NeedBackwardSearch(p_fmat->GetColDensity(fid), ind)) {
this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1,
fid, gpair, info, stemp_[tid]);
}
@@ -622,7 +615,7 @@ class ColMaker: public TreeUpdater {
} else {
for (bst_omp_uint fid = 0; fid < num_features; ++fid) {
this->ParallelFindSplit(batch[fid], fid,
fmat, gpair);
p_fmat, gpair);
}
}
}
@@ -633,9 +626,8 @@ class ColMaker: public TreeUpdater {
DMatrix *p_fmat,
RegTree *p_tree) {
const std::vector<int> &feat_set = column_sampler_.GetFeatureSet(depth).HostVector();
auto iter = p_fmat->ColIterator();
while (iter->Next()) {
this->UpdateSolution(iter->Value(), feat_set, gpair, *p_fmat);
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
this->UpdateSolution(batch, feat_set, gpair, p_fmat);
}
// after this each thread's stemp will get the best candidates, aggregate results
this->SyncBestSolution(qexpand);
@@ -661,15 +653,13 @@ class ColMaker: public TreeUpdater {
// set the positions in the nondefault
this->SetNonDefaultPosition(qexpand, p_fmat, tree);
// set rest of instances to default position
const RowSet &rowset = p_fmat->BufferedRowset();
// set default direct nodes to default
// for leaf nodes that are not fresh, mark then to ~nid,
// so that they are ignored in future statistics collection
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i];
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
CHECK_LT(ridx, position_.size())
<< "ridx exceed bound " << "ridx="<< ridx << " pos=" << position_.size();
const int nid = this->DecodePosition(ridx);
@@ -710,9 +700,7 @@ class ColMaker: public TreeUpdater {
}
std::sort(fsplits.begin(), fsplits.end());
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
auto iter = p_fmat->ColIterator();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
for (auto fid : fsplits) {
auto col = batch[fid];
const auto ndata = static_cast<bst_omp_uint>(col.size());
@@ -798,11 +786,9 @@ class DistColMaker : public ColMaker {
std::unique_ptr<SplitEvaluator> spliteval)
: ColMaker::Builder(param, std::move(spliteval)) {}
inline void UpdatePosition(DMatrix* p_fmat, const RegTree &tree) {
const RowSet &rowset = p_fmat->BufferedRowset();
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i];
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
int nid = this->DecodePosition(ridx);
while (tree[nid].IsDeleted()) {
nid = tree[nid].Parent();
@@ -840,9 +826,7 @@ class DistColMaker : public ColMaker {
boolmap_[j] = 0;
}
}
auto iter = p_fmat->ColIterator();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
for (auto fid : fsplits) {
auto col = batch[fid];
const auto ndata = static_cast<bst_omp_uint>(col.size());
@@ -865,12 +849,10 @@ class DistColMaker : public ColMaker {
bitmap_.InitFromBool(boolmap_);
// communicate bitmap
rabit::Allreduce<rabit::op::BitOR>(dmlc::BeginPtr(bitmap_.data), bitmap_.data.size());
const RowSet &rowset = p_fmat->BufferedRowset();
// get the new position
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < ndata; ++i) {
const bst_uint ridx = rowset[i];
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
const int nid = this->DecodePosition(ridx);
if (bitmap_.Get(ridx)) {
CHECK(!tree[nid].IsLeaf()) << "inconsistent reduce information";

View File

@@ -661,19 +661,12 @@ class GPUMaker : public TreeUpdater {
fId->reserve(nCols * nRows);
// in case you end up with a DMatrix having no column access
// then make sure to enable that before copying the data!
if (!dmat->HaveColAccess(true)) {
dmat->InitColAccess(nRows, true);
}
auto iter = dmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto& batch : dmat->GetSortedColumnBatches()) {
for (int i = 0; i < batch.Size(); i++) {
auto col = batch[i];
for (const Entry* it = col.data(); it != col.data() + col.size();
it++) {
int inst_id = static_cast<int>(it->index);
fval->push_back(it->fvalue);
for (const Entry& e : col) {
int inst_id = static_cast<int>(e.index);
fval->push_back(e.fvalue);
fId->push_back(inst_id);
}
offset->push_back(fval->size());

View File

@@ -19,6 +19,7 @@
#include "../common/hist_util.h"
#include "../common/host_device_vector.h"
#include "../common/timer.h"
#include "../common/common.h"
#include "param.h"
#include "updater_gpu_common.cuh"
@@ -803,10 +804,8 @@ class GPUHistMaker : public TreeUpdater {
reducer_.Init(device_list_);
dmlc::DataIter<SparsePage>* iter = dmat->RowIterator();
iter->BeforeFirst();
CHECK(iter->Next()) << "Empty batches are not supported";
const SparsePage& batch = iter->Value();
auto batch_iter = dmat->GetRowBatches().begin();
const SparsePage& batch = *batch_iter;
// Create device shards
shards_.resize(n_devices);
dh::ExecuteIndexShards(&shards_, [&](int i, std::unique_ptr<DeviceShard>& shard) {
@@ -828,8 +827,8 @@ class GPUHistMaker : public TreeUpdater {
shard->InitCompressedData(hmat_, batch);
});
monitor_.Stop("BinningCompression", dist_.Devices());
CHECK(!iter->Next()) << "External memory not supported";
++batch_iter;
CHECK(batch_iter.AtEnd()) << "External memory not supported";
p_last_fmat_ = dmat;
initialised_ = true;

View File

@@ -344,10 +344,7 @@ class CQHistMaker: public HistMaker<TStats> {
{
thread_hist_.resize(omp_get_max_threads());
// start accumulating statistics
auto iter = p_fmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
// start enumeration
const auto nsize = static_cast<bst_omp_uint>(fset.size());
#pragma omp parallel for schedule(dynamic, 1)
@@ -426,10 +423,7 @@ class CQHistMaker: public HistMaker<TStats> {
work_set_.resize(std::unique(work_set_.begin(), work_set_.end()) - work_set_.begin());
// start accumulating statistics
auto iter = p_fmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
// TWOPASS: use the real set + split set in the column iteration.
this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree);
@@ -714,10 +708,7 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
std::unique(this->work_set_.begin(), this->work_set_.end()) - this->work_set_.begin());
// start accumulating statistics
auto iter = p_fmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
// TWOPASS: use the real set + split set in the column iteration.
this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree);
@@ -772,10 +763,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
sketchs_[i].Init(info.num_row_, this->param_.sketch_eps);
}
// start accumulating statistics
auto iter = p_fmat->RowIterator();
iter->BeforeFirst();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto &batch : p_fmat->GetRowBatches()) {
// parallel convert to column major format
common::ParallelGroupBuilder<Entry>
builder(&col_ptr_, &col_data_, &thread_col_ptr_);

View File

@@ -57,10 +57,7 @@ class TreeRefresher: public TreeUpdater {
{
const MetaInfo &info = p_fmat->Info();
// start accumulating statistics
auto *iter = p_fmat->RowIterator();
iter->BeforeFirst();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto &batch : p_fmat->GetRowBatches()) {
CHECK_LT(batch.Size(), std::numeric_limits<unsigned>::max());
const auto nbatch = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(static)

View File

@@ -142,12 +142,9 @@ class SketchMaker: public BaseMaker {
}
thread_sketch_.resize(omp_get_max_threads());
// number of rows in
const size_t nrows = p_fmat->BufferedRowset().Size();
const size_t nrows = p_fmat->Info().num_row_;
// start accumulating statistics
auto iter = p_fmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
auto &batch = iter->Value();
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
// start enumeration
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(dynamic, 1)