Dmatrix refactor stage 2 (#3395)
* DMatrix refactor 2 * Remove buffered rowset usage where possible * Transition to c++11 style iterators for row access * Transition column iterators to C++ 11
This commit is contained in:
@@ -43,15 +43,14 @@ class BaseMaker: public TreeUpdater {
|
||||
std::fill(fminmax_.begin(), fminmax_.end(),
|
||||
-std::numeric_limits<bst_float>::max());
|
||||
// start accumulating statistics
|
||||
auto iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
|
||||
for (bst_uint fid = 0; fid < batch.Size(); ++fid) {
|
||||
auto c = batch[fid];
|
||||
auto c = batch[fid];
|
||||
if (c.size() != 0) {
|
||||
fminmax_[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]);
|
||||
fminmax_[fid * 2 + 1] = std::max(c[c.size() - 1].fvalue, fminmax_[fid * 2 + 1]);
|
||||
fminmax_[fid * 2 + 0] =
|
||||
std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]);
|
||||
fminmax_[fid * 2 + 1] =
|
||||
std::max(c[c.size() - 1].fvalue, fminmax_[fid * 2 + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -208,16 +207,13 @@ class BaseMaker: public TreeUpdater {
|
||||
*/
|
||||
inline void SetDefaultPostion(DMatrix *p_fmat,
|
||||
const RegTree &tree) {
|
||||
// set rest of instances to default position
|
||||
const RowSet &rowset = p_fmat->BufferedRowset();
|
||||
// set default direct nodes to default
|
||||
// for leaf nodes that are not fresh, mark then to ~nid,
|
||||
// so that they are ignored in future statistics collection
|
||||
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
|
||||
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
|
||||
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
if (tree[nid].IsLeaf()) {
|
||||
// mark finish when it is not a fresh leaf
|
||||
@@ -303,9 +299,7 @@ class BaseMaker: public TreeUpdater {
|
||||
const RegTree &tree) {
|
||||
std::vector<unsigned> fsplits;
|
||||
this->GetSplitSet(nodes, tree, &fsplits);
|
||||
auto iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
|
||||
for (auto fid : fsplits) {
|
||||
auto col = batch[fid];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.size());
|
||||
@@ -345,12 +339,10 @@ class BaseMaker: public TreeUpdater {
|
||||
thread_temp[tid][nid].Clear();
|
||||
}
|
||||
}
|
||||
const RowSet &rowset = fmat.BufferedRowset();
|
||||
// setup position
|
||||
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
|
||||
const auto ndata = static_cast<bst_omp_uint>(fmat.Info().num_row_);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
|
||||
const int nid = position_[ridx];
|
||||
const int tid = omp_get_thread_num();
|
||||
if (nid >= 0) {
|
||||
|
||||
@@ -141,32 +141,27 @@ class ColMaker: public TreeUpdater {
|
||||
CHECK_EQ(tree.param.num_nodes, tree.param.num_roots)
|
||||
<< "ColMaker: can only grow new tree";
|
||||
const std::vector<unsigned>& root_index = fmat.Info().root_index_;
|
||||
const RowSet& rowset = fmat.BufferedRowset();
|
||||
{
|
||||
// setup position
|
||||
position_.resize(gpair.size());
|
||||
CHECK_EQ(fmat.Info().num_row_, position_.size());
|
||||
if (root_index.size() == 0) {
|
||||
for (size_t i = 0; i < rowset.Size(); ++i) {
|
||||
position_[rowset[i]] = 0;
|
||||
}
|
||||
std::fill(position_.begin(), position_.end(), 0);
|
||||
} else {
|
||||
for (size_t i = 0; i < rowset.Size(); ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
for (size_t ridx = 0; ridx < position_.size(); ++ridx) {
|
||||
position_[ridx] = root_index[ridx];
|
||||
CHECK_LT(root_index[ridx], (unsigned)tree.param.num_roots);
|
||||
}
|
||||
}
|
||||
// mark delete for the deleted datas
|
||||
for (size_t i = 0; i < rowset.Size(); ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
for (size_t ridx = 0; ridx < position_.size(); ++ridx) {
|
||||
if (gpair[ridx].GetHess() < 0.0f) position_[ridx] = ~position_[ridx];
|
||||
}
|
||||
// mark subsample
|
||||
if (param_.subsample < 1.0f) {
|
||||
std::bernoulli_distribution coin_flip(param_.subsample);
|
||||
auto& rnd = common::GlobalRandom();
|
||||
for (size_t i = 0; i < rowset.Size(); ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
for (size_t ridx = 0; ridx < position_.size(); ++ridx) {
|
||||
if (gpair[ridx].GetHess() < 0.0f) continue;
|
||||
if (!coin_flip(rnd)) position_[ridx] = ~position_[ridx];
|
||||
}
|
||||
@@ -209,13 +204,11 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
snode_.resize(tree.param.num_nodes, NodeEntry(param_));
|
||||
}
|
||||
const RowSet &rowset = fmat.BufferedRowset();
|
||||
const MetaInfo& info = fmat.Info();
|
||||
// setup position
|
||||
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
|
||||
const auto ndata = static_cast<bst_omp_uint>(info.num_row_);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
|
||||
const int tid = omp_get_thread_num();
|
||||
if (position_[ridx] < 0) continue;
|
||||
stemp_[tid][position_[ridx]].stats.Add(gpair, info, ridx);
|
||||
@@ -254,13 +247,13 @@ class ColMaker: public TreeUpdater {
|
||||
// this function does not support nested functions
|
||||
inline void ParallelFindSplit(const SparsePage::Inst &col,
|
||||
bst_uint fid,
|
||||
const DMatrix &fmat,
|
||||
DMatrix *p_fmat,
|
||||
const std::vector<GradientPair> &gpair) {
|
||||
// TODO(tqchen): double check stats order.
|
||||
const MetaInfo& info = fmat.Info();
|
||||
const MetaInfo& info = p_fmat->Info();
|
||||
const bool ind = col.size() != 0 && col[0].fvalue == col[col.size() - 1].fvalue;
|
||||
bool need_forward = param_.NeedForwardSearch(fmat.GetColDensity(fid), ind);
|
||||
bool need_backward = param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind);
|
||||
bool need_forward = param_.NeedForwardSearch(p_fmat->GetColDensity(fid), ind);
|
||||
bool need_backward = param_.NeedBackwardSearch(p_fmat->GetColDensity(fid), ind);
|
||||
const std::vector<int> &qexpand = qexpand_;
|
||||
#pragma omp parallel
|
||||
{
|
||||
@@ -592,8 +585,8 @@ class ColMaker: public TreeUpdater {
|
||||
virtual void UpdateSolution(const SparsePage &batch,
|
||||
const std::vector<int> &feat_set,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
const DMatrix &fmat) {
|
||||
const MetaInfo& info = fmat.Info();
|
||||
DMatrix*p_fmat) {
|
||||
const MetaInfo& info = p_fmat->Info();
|
||||
// start enumeration
|
||||
const auto num_features = static_cast<bst_omp_uint>(feat_set.size());
|
||||
#if defined(_OPENMP)
|
||||
@@ -610,11 +603,11 @@ class ColMaker: public TreeUpdater {
|
||||
const int tid = omp_get_thread_num();
|
||||
auto c = batch[fid];
|
||||
const bool ind = c.size() != 0 && c[0].fvalue == c[c.size() - 1].fvalue;
|
||||
if (param_.NeedForwardSearch(fmat.GetColDensity(fid), ind)) {
|
||||
if (param_.NeedForwardSearch(p_fmat->GetColDensity(fid), ind)) {
|
||||
this->EnumerateSplit(c.data(), c.data() + c.size(), +1,
|
||||
fid, gpair, info, stemp_[tid]);
|
||||
}
|
||||
if (param_.NeedBackwardSearch(fmat.GetColDensity(fid), ind)) {
|
||||
if (param_.NeedBackwardSearch(p_fmat->GetColDensity(fid), ind)) {
|
||||
this->EnumerateSplit(c.data() + c.size() - 1, c.data() - 1, -1,
|
||||
fid, gpair, info, stemp_[tid]);
|
||||
}
|
||||
@@ -622,7 +615,7 @@ class ColMaker: public TreeUpdater {
|
||||
} else {
|
||||
for (bst_omp_uint fid = 0; fid < num_features; ++fid) {
|
||||
this->ParallelFindSplit(batch[fid], fid,
|
||||
fmat, gpair);
|
||||
p_fmat, gpair);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -633,9 +626,8 @@ class ColMaker: public TreeUpdater {
|
||||
DMatrix *p_fmat,
|
||||
RegTree *p_tree) {
|
||||
const std::vector<int> &feat_set = column_sampler_.GetFeatureSet(depth).HostVector();
|
||||
auto iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
this->UpdateSolution(iter->Value(), feat_set, gpair, *p_fmat);
|
||||
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
|
||||
this->UpdateSolution(batch, feat_set, gpair, p_fmat);
|
||||
}
|
||||
// after this each thread's stemp will get the best candidates, aggregate results
|
||||
this->SyncBestSolution(qexpand);
|
||||
@@ -661,15 +653,13 @@ class ColMaker: public TreeUpdater {
|
||||
// set the positions in the nondefault
|
||||
this->SetNonDefaultPosition(qexpand, p_fmat, tree);
|
||||
// set rest of instances to default position
|
||||
const RowSet &rowset = p_fmat->BufferedRowset();
|
||||
// set default direct nodes to default
|
||||
// for leaf nodes that are not fresh, mark then to ~nid,
|
||||
// so that they are ignored in future statistics collection
|
||||
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
|
||||
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
|
||||
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
|
||||
CHECK_LT(ridx, position_.size())
|
||||
<< "ridx exceed bound " << "ridx="<< ridx << " pos=" << position_.size();
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
@@ -710,9 +700,7 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
std::sort(fsplits.begin(), fsplits.end());
|
||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||
auto iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
|
||||
for (auto fid : fsplits) {
|
||||
auto col = batch[fid];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.size());
|
||||
@@ -798,11 +786,9 @@ class DistColMaker : public ColMaker {
|
||||
std::unique_ptr<SplitEvaluator> spliteval)
|
||||
: ColMaker::Builder(param, std::move(spliteval)) {}
|
||||
inline void UpdatePosition(DMatrix* p_fmat, const RegTree &tree) {
|
||||
const RowSet &rowset = p_fmat->BufferedRowset();
|
||||
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
|
||||
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
|
||||
int nid = this->DecodePosition(ridx);
|
||||
while (tree[nid].IsDeleted()) {
|
||||
nid = tree[nid].Parent();
|
||||
@@ -840,9 +826,7 @@ class DistColMaker : public ColMaker {
|
||||
boolmap_[j] = 0;
|
||||
}
|
||||
}
|
||||
auto iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
|
||||
for (auto fid : fsplits) {
|
||||
auto col = batch[fid];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.size());
|
||||
@@ -865,12 +849,10 @@ class DistColMaker : public ColMaker {
|
||||
bitmap_.InitFromBool(boolmap_);
|
||||
// communicate bitmap
|
||||
rabit::Allreduce<rabit::op::BitOR>(dmlc::BeginPtr(bitmap_.data), bitmap_.data.size());
|
||||
const RowSet &rowset = p_fmat->BufferedRowset();
|
||||
// get the new position
|
||||
const auto ndata = static_cast<bst_omp_uint>(rowset.Size());
|
||||
const auto ndata = static_cast<bst_omp_uint>(p_fmat->Info().num_row_);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ndata; ++i) {
|
||||
const bst_uint ridx = rowset[i];
|
||||
for (bst_omp_uint ridx = 0; ridx < ndata; ++ridx) {
|
||||
const int nid = this->DecodePosition(ridx);
|
||||
if (bitmap_.Get(ridx)) {
|
||||
CHECK(!tree[nid].IsLeaf()) << "inconsistent reduce information";
|
||||
|
||||
@@ -661,19 +661,12 @@ class GPUMaker : public TreeUpdater {
|
||||
fId->reserve(nCols * nRows);
|
||||
// in case you end up with a DMatrix having no column access
|
||||
// then make sure to enable that before copying the data!
|
||||
if (!dmat->HaveColAccess(true)) {
|
||||
dmat->InitColAccess(nRows, true);
|
||||
}
|
||||
auto iter = dmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto& batch : dmat->GetSortedColumnBatches()) {
|
||||
for (int i = 0; i < batch.Size(); i++) {
|
||||
auto col = batch[i];
|
||||
for (const Entry* it = col.data(); it != col.data() + col.size();
|
||||
it++) {
|
||||
int inst_id = static_cast<int>(it->index);
|
||||
fval->push_back(it->fvalue);
|
||||
for (const Entry& e : col) {
|
||||
int inst_id = static_cast<int>(e.index);
|
||||
fval->push_back(e.fvalue);
|
||||
fId->push_back(inst_id);
|
||||
}
|
||||
offset->push_back(fval->size());
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "../common/hist_util.h"
|
||||
#include "../common/host_device_vector.h"
|
||||
#include "../common/timer.h"
|
||||
#include "../common/common.h"
|
||||
#include "param.h"
|
||||
#include "updater_gpu_common.cuh"
|
||||
|
||||
@@ -803,10 +804,8 @@ class GPUHistMaker : public TreeUpdater {
|
||||
|
||||
reducer_.Init(device_list_);
|
||||
|
||||
dmlc::DataIter<SparsePage>* iter = dmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
CHECK(iter->Next()) << "Empty batches are not supported";
|
||||
const SparsePage& batch = iter->Value();
|
||||
auto batch_iter = dmat->GetRowBatches().begin();
|
||||
const SparsePage& batch = *batch_iter;
|
||||
// Create device shards
|
||||
shards_.resize(n_devices);
|
||||
dh::ExecuteIndexShards(&shards_, [&](int i, std::unique_ptr<DeviceShard>& shard) {
|
||||
@@ -828,8 +827,8 @@ class GPUHistMaker : public TreeUpdater {
|
||||
shard->InitCompressedData(hmat_, batch);
|
||||
});
|
||||
monitor_.Stop("BinningCompression", dist_.Devices());
|
||||
|
||||
CHECK(!iter->Next()) << "External memory not supported";
|
||||
++batch_iter;
|
||||
CHECK(batch_iter.AtEnd()) << "External memory not supported";
|
||||
|
||||
p_last_fmat_ = dmat;
|
||||
initialised_ = true;
|
||||
|
||||
@@ -344,10 +344,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
||||
{
|
||||
thread_hist_.resize(omp_get_max_threads());
|
||||
// start accumulating statistics
|
||||
auto iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
|
||||
// start enumeration
|
||||
const auto nsize = static_cast<bst_omp_uint>(fset.size());
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
@@ -426,10 +423,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
||||
work_set_.resize(std::unique(work_set_.begin(), work_set_.end()) - work_set_.begin());
|
||||
|
||||
// start accumulating statistics
|
||||
auto iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
|
||||
// TWOPASS: use the real set + split set in the column iteration.
|
||||
this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree);
|
||||
|
||||
@@ -714,10 +708,7 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
|
||||
std::unique(this->work_set_.begin(), this->work_set_.end()) - this->work_set_.begin());
|
||||
|
||||
// start accumulating statistics
|
||||
auto iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
|
||||
// TWOPASS: use the real set + split set in the column iteration.
|
||||
this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree);
|
||||
|
||||
@@ -772,10 +763,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
|
||||
sketchs_[i].Init(info.num_row_, this->param_.sketch_eps);
|
||||
}
|
||||
// start accumulating statistics
|
||||
auto iter = p_fmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto &batch : p_fmat->GetRowBatches()) {
|
||||
// parallel convert to column major format
|
||||
common::ParallelGroupBuilder<Entry>
|
||||
builder(&col_ptr_, &col_data_, &thread_col_ptr_);
|
||||
|
||||
@@ -57,10 +57,7 @@ class TreeRefresher: public TreeUpdater {
|
||||
{
|
||||
const MetaInfo &info = p_fmat->Info();
|
||||
// start accumulating statistics
|
||||
auto *iter = p_fmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto &batch : p_fmat->GetRowBatches()) {
|
||||
CHECK_LT(batch.Size(), std::numeric_limits<unsigned>::max());
|
||||
const auto nbatch = static_cast<bst_omp_uint>(batch.Size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
|
||||
@@ -142,12 +142,9 @@ class SketchMaker: public BaseMaker {
|
||||
}
|
||||
thread_sketch_.resize(omp_get_max_threads());
|
||||
// number of rows in
|
||||
const size_t nrows = p_fmat->BufferedRowset().Size();
|
||||
const size_t nrows = p_fmat->Info().num_row_;
|
||||
// start accumulating statistics
|
||||
auto iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
auto &batch = iter->Value();
|
||||
for (const auto &batch : p_fmat->GetSortedColumnBatches()) {
|
||||
// start enumeration
|
||||
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
|
||||
Reference in New Issue
Block a user