Dmatrix refactor stage 1 (#3301)
* Use sparse page as singular CSR matrix representation * Simplify dmatrix methods * Reduce statefullness of batch iterators * BREAKING CHANGE: Remove prob_buffer_row parameter. Users are instead recommended to sample their dataset as a preprocessing step before using XGBoost.
This commit is contained in:
@@ -43,13 +43,12 @@ class BaseMaker: public TreeUpdater {
|
||||
std::fill(fminmax_.begin(), fminmax_.end(),
|
||||
-std::numeric_limits<bst_float>::max());
|
||||
// start accumulating statistics
|
||||
dmlc::DataIter<ColBatch>* iter = p_fmat->ColIterator();
|
||||
auto iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const ColBatch& batch = iter->Value();
|
||||
for (bst_uint i = 0; i < batch.size; ++i) {
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
const ColBatch::Inst& c = batch[i];
|
||||
auto batch = iter->Value();
|
||||
for (bst_uint fid = 0; fid < batch.Size(); ++fid) {
|
||||
auto c = batch[fid];
|
||||
if (c.length != 0) {
|
||||
fminmax_[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]);
|
||||
fminmax_[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax_[fid * 2 + 1]);
|
||||
@@ -104,7 +103,7 @@ class BaseMaker: public TreeUpdater {
|
||||
// ------static helper functions ------
|
||||
// helper function to get to next level of the tree
|
||||
/*! \brief this is helper function for row based data*/
|
||||
inline static int NextLevel(const RowBatch::Inst &inst, const RegTree &tree, int nid) {
|
||||
inline static int NextLevel(const SparsePage::Inst &inst, const RegTree &tree, int nid) {
|
||||
const RegTree::Node &n = tree[nid];
|
||||
bst_uint findex = n.SplitIndex();
|
||||
for (unsigned i = 0; i < inst.length; ++i) {
|
||||
@@ -244,12 +243,10 @@ class BaseMaker: public TreeUpdater {
|
||||
* \param tree the regression tree structure
|
||||
*/
|
||||
inline void CorrectNonDefaultPositionByBatch(
|
||||
const ColBatch& batch,
|
||||
const std::vector<bst_uint> &sorted_split_set,
|
||||
const SparsePage &batch, const std::vector<bst_uint> &sorted_split_set,
|
||||
const RegTree &tree) {
|
||||
for (size_t i = 0; i < batch.size; ++i) {
|
||||
ColBatch::Inst col = batch[i];
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
for (size_t fid = 0; fid < batch.Size(); ++fid) {
|
||||
auto col = batch[fid];
|
||||
auto it = std::lower_bound(sorted_split_set.begin(), sorted_split_set.end(), fid);
|
||||
|
||||
if (it != sorted_split_set.end() && *it == fid) {
|
||||
@@ -306,12 +303,11 @@ class BaseMaker: public TreeUpdater {
|
||||
const RegTree &tree) {
|
||||
std::vector<unsigned> fsplits;
|
||||
this->GetSplitSet(nodes, tree, &fsplits);
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fsplits);
|
||||
auto iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
for (size_t i = 0; i < batch.size; ++i) {
|
||||
ColBatch::Inst col = batch[i];
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
auto batch = iter->Value();
|
||||
for (auto fid : fsplits) {
|
||||
auto col = batch[fid];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
|
||||
@@ -252,7 +252,7 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
// parallel find the best split of current fid
|
||||
// this function does not support nested functions
|
||||
inline void ParallelFindSplit(const ColBatch::Inst &col,
|
||||
inline void ParallelFindSplit(const SparsePage::Inst &col,
|
||||
bst_uint fid,
|
||||
const DMatrix &fmat,
|
||||
const std::vector<GradientPair> &gpair) {
|
||||
@@ -439,8 +439,8 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
}
|
||||
// same as EnumerateSplit, with cacheline prefetch optimization
|
||||
inline void EnumerateSplitCacheOpt(const ColBatch::Entry *begin,
|
||||
const ColBatch::Entry *end,
|
||||
inline void EnumerateSplitCacheOpt(const Entry *begin,
|
||||
const Entry *end,
|
||||
int d_step,
|
||||
bst_uint fid,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
@@ -457,18 +457,18 @@ class ColMaker: public TreeUpdater {
|
||||
int buf_position[kBuffer] = {};
|
||||
GradientPair buf_gpair[kBuffer] = {};
|
||||
// aligned ending position
|
||||
const ColBatch::Entry *align_end;
|
||||
const Entry *align_end;
|
||||
if (d_step > 0) {
|
||||
align_end = begin + (end - begin) / kBuffer * kBuffer;
|
||||
} else {
|
||||
align_end = begin - (begin - end) / kBuffer * kBuffer;
|
||||
}
|
||||
int i;
|
||||
const ColBatch::Entry *it;
|
||||
const Entry *it;
|
||||
const int align_step = d_step * kBuffer;
|
||||
// internal cached loop
|
||||
for (it = begin; it != align_end; it += align_step) {
|
||||
const ColBatch::Entry *p;
|
||||
const Entry *p;
|
||||
for (i = 0, p = it; i < kBuffer; ++i, p += d_step) {
|
||||
buf_position[i] = position_[p->index];
|
||||
buf_gpair[i] = gpair[p->index];
|
||||
@@ -519,8 +519,8 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
|
||||
// enumerate the split values of specific feature
|
||||
inline void EnumerateSplit(const ColBatch::Entry *begin,
|
||||
const ColBatch::Entry *end,
|
||||
inline void EnumerateSplit(const Entry *begin,
|
||||
const Entry *end,
|
||||
int d_step,
|
||||
bst_uint fid,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
@@ -538,7 +538,7 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
// left statistics
|
||||
TStats c(param_);
|
||||
for (const ColBatch::Entry *it = begin; it != end; it += d_step) {
|
||||
for (const Entry *it = begin; it != end; it += d_step) {
|
||||
const bst_uint ridx = it->index;
|
||||
const int nid = position_[ridx];
|
||||
if (nid < 0) continue;
|
||||
@@ -602,25 +602,26 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
|
||||
// update the solution candidate
|
||||
virtual void UpdateSolution(const ColBatch& batch,
|
||||
const std::vector<GradientPair>& gpair,
|
||||
const DMatrix& fmat) {
|
||||
virtual void UpdateSolution(const SparsePage &batch,
|
||||
const std::vector<bst_uint> &feat_set,
|
||||
const std::vector<GradientPair> &gpair,
|
||||
const DMatrix &fmat) {
|
||||
const MetaInfo& info = fmat.Info();
|
||||
// start enumeration
|
||||
const auto nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
const auto num_features = static_cast<bst_omp_uint>(feat_set.size());
|
||||
#if defined(_OPENMP)
|
||||
const int batch_size = std::max(static_cast<int>(nsize / this->nthread_ / 32), 1);
|
||||
const int batch_size = std::max(static_cast<int>(num_features / this->nthread_ / 32), 1);
|
||||
#endif
|
||||
int poption = param_.parallel_option;
|
||||
if (poption == 2) {
|
||||
poption = static_cast<int>(nsize) * 2 < this->nthread_ ? 1 : 0;
|
||||
poption = static_cast<int>(num_features) * 2 < this->nthread_ ? 1 : 0;
|
||||
}
|
||||
if (poption == 0) {
|
||||
#pragma omp parallel for schedule(dynamic, batch_size)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
for (bst_omp_uint i = 0; i < num_features; ++i) {
|
||||
int fid = feat_set[i];
|
||||
const int tid = omp_get_thread_num();
|
||||
const ColBatch::Inst c = batch[i];
|
||||
auto c = batch[fid];
|
||||
const bool ind = c.length != 0 && c.data[0].fvalue == c.data[c.length - 1].fvalue;
|
||||
if (param_.NeedForwardSearch(fmat.GetColDensity(fid), ind)) {
|
||||
this->EnumerateSplit(c.data, c.data + c.length, +1,
|
||||
@@ -632,8 +633,8 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
this->ParallelFindSplit(batch[i], batch.col_index[i],
|
||||
for (bst_omp_uint fid = 0; fid < num_features; ++fid) {
|
||||
this->ParallelFindSplit(batch[fid], fid,
|
||||
fmat, gpair);
|
||||
}
|
||||
}
|
||||
@@ -653,9 +654,9 @@ class ColMaker: public TreeUpdater {
|
||||
<< "colsample_bylevel cannot be zero.";
|
||||
feat_set.resize(n);
|
||||
}
|
||||
dmlc::DataIter<ColBatch>* iter = p_fmat->ColIterator(feat_set);
|
||||
auto iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
this->UpdateSolution(iter->Value(), gpair, *p_fmat);
|
||||
this->UpdateSolution(iter->Value(), feat_set, gpair, *p_fmat);
|
||||
}
|
||||
// after this each thread's stemp will get the best candidates, aggregate results
|
||||
this->SyncBestSolution(qexpand);
|
||||
@@ -730,12 +731,11 @@ class ColMaker: public TreeUpdater {
|
||||
}
|
||||
std::sort(fsplits.begin(), fsplits.end());
|
||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fsplits);
|
||||
auto iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
for (size_t i = 0; i < batch.size; ++i) {
|
||||
ColBatch::Inst col = batch[i];
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
auto batch = iter->Value();
|
||||
for (auto fid : fsplits) {
|
||||
auto col = batch[fid];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
@@ -859,12 +859,11 @@ class DistColMaker : public ColMaker<TStats, TConstraint> {
|
||||
boolmap_[j] = 0;
|
||||
}
|
||||
}
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fsplits);
|
||||
auto iter = p_fmat->ColIterator();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
for (size_t i = 0; i < batch.size; ++i) {
|
||||
ColBatch::Inst col = batch[i];
|
||||
const bst_uint fid = batch.col_index[i];
|
||||
auto batch = iter->Value();
|
||||
for (auto fid : fsplits) {
|
||||
auto col = batch[fid];
|
||||
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||
|
||||
@@ -661,16 +661,15 @@ class GPUMaker : public TreeUpdater {
|
||||
// in case you end up with a DMatrix having no column access
|
||||
// then make sure to enable that before copying the data!
|
||||
if (!dmat->HaveColAccess(true)) {
|
||||
const std::vector<bool> enable(nCols, true);
|
||||
dmat->InitColAccess(enable, 1, nRows, true);
|
||||
dmat->InitColAccess(nRows, true);
|
||||
}
|
||||
dmlc::DataIter<ColBatch>* iter = dmat->ColIterator();
|
||||
auto iter = dmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const ColBatch& batch = iter->Value();
|
||||
for (int i = 0; i < batch.size; i++) {
|
||||
const ColBatch::Inst& col = batch[i];
|
||||
for (const ColBatch::Entry* it = col.data; it != col.data + col.length;
|
||||
auto batch = iter->Value();
|
||||
for (int i = 0; i < batch.Size(); i++) {
|
||||
auto col = batch[i];
|
||||
for (const Entry* it = col.data; it != col.data + col.length;
|
||||
it++) {
|
||||
int inst_id = static_cast<int>(it->index);
|
||||
fval->push_back(it->fvalue);
|
||||
|
||||
@@ -250,7 +250,7 @@ __device__ int upper_bound(const float* __restrict__ cuts, int n, float v) {
|
||||
__global__ void compress_bin_ellpack_k
|
||||
(common::CompressedBufferWriter wr, common::CompressedByteT* __restrict__ buffer,
|
||||
const size_t* __restrict__ row_ptrs,
|
||||
const RowBatch::Entry* __restrict__ entries,
|
||||
const Entry* __restrict__ entries,
|
||||
const float* __restrict__ cuts, const size_t* __restrict__ cut_rows,
|
||||
size_t base_row, size_t n_rows, size_t row_ptr_begin, size_t row_stride,
|
||||
unsigned int null_gidx_value) {
|
||||
@@ -261,7 +261,7 @@ __global__ void compress_bin_ellpack_k
|
||||
int row_size = static_cast<int>(row_ptrs[irow + 1] - row_ptrs[irow]);
|
||||
unsigned int bin = null_gidx_value;
|
||||
if (ifeature < row_size) {
|
||||
RowBatch::Entry entry = entries[row_ptrs[irow] - row_ptr_begin + ifeature];
|
||||
Entry entry = entries[row_ptrs[irow] - row_ptr_begin + ifeature];
|
||||
int feature = entry.index;
|
||||
float fvalue = entry.fvalue;
|
||||
const float *feature_cuts = &cuts[cut_rows[feature]];
|
||||
@@ -332,7 +332,7 @@ struct DeviceShard {
|
||||
param(param),
|
||||
prediction_cache_initialised(false) {}
|
||||
|
||||
void Init(const common::HistCutMatrix& hmat, const RowBatch& row_batch) {
|
||||
void Init(const common::HistCutMatrix& hmat, const SparsePage& row_batch) {
|
||||
// copy cuts to the GPU
|
||||
dh::safe_cuda(cudaSetDevice(device_idx));
|
||||
thrust::device_vector<float> cuts_d(hmat.cut);
|
||||
@@ -340,7 +340,7 @@ struct DeviceShard {
|
||||
|
||||
// find the maximum row size
|
||||
thrust::device_vector<size_t> row_ptr_d(
|
||||
row_batch.ind_ptr + row_begin_idx, row_batch.ind_ptr + row_end_idx + 1);
|
||||
&row_batch.offset[row_begin_idx], &row_batch.offset[row_end_idx + 1]);
|
||||
|
||||
auto row_iter = row_ptr_d.begin();
|
||||
auto get_size = [=] __device__(size_t row) {
|
||||
@@ -369,11 +369,11 @@ struct DeviceShard {
|
||||
// bin and compress entries in batches of rows
|
||||
// use no more than 1/16th of GPU memory per batch
|
||||
size_t gpu_batch_nrows = dh::TotalMemory(device_idx) /
|
||||
(16 * row_stride * sizeof(RowBatch::Entry));
|
||||
(16 * row_stride * sizeof(Entry));
|
||||
if (gpu_batch_nrows > n_rows) {
|
||||
gpu_batch_nrows = n_rows;
|
||||
}
|
||||
thrust::device_vector<RowBatch::Entry> entries_d(gpu_batch_nrows * row_stride);
|
||||
thrust::device_vector<Entry> entries_d(gpu_batch_nrows * row_stride);
|
||||
size_t gpu_nbatches = dh::DivRoundUp(n_rows, gpu_batch_nrows);
|
||||
for (size_t gpu_batch = 0; gpu_batch < gpu_nbatches; ++gpu_batch) {
|
||||
size_t batch_row_begin = gpu_batch * gpu_batch_nrows;
|
||||
@@ -383,13 +383,13 @@ struct DeviceShard {
|
||||
}
|
||||
size_t batch_nrows = batch_row_end - batch_row_begin;
|
||||
size_t n_entries =
|
||||
row_batch.ind_ptr[row_begin_idx + batch_row_end] -
|
||||
row_batch.ind_ptr[row_begin_idx + batch_row_begin];
|
||||
row_batch.offset[row_begin_idx + batch_row_end] -
|
||||
row_batch.offset[row_begin_idx + batch_row_begin];
|
||||
dh::safe_cuda
|
||||
(cudaMemcpy
|
||||
(entries_d.data().get(),
|
||||
&row_batch.data_ptr[row_batch.ind_ptr[row_begin_idx + batch_row_begin]],
|
||||
n_entries * sizeof(RowBatch::Entry), cudaMemcpyDefault));
|
||||
&row_batch.data[row_batch.offset[row_begin_idx + batch_row_begin]],
|
||||
n_entries * sizeof(Entry), cudaMemcpyDefault));
|
||||
dim3 block3(32, 8, 1);
|
||||
dim3 grid3(dh::DivRoundUp(n_rows, block3.x),
|
||||
dh::DivRoundUp(row_stride, block3.y), 1);
|
||||
@@ -398,7 +398,7 @@ struct DeviceShard {
|
||||
row_ptr_d.data().get() + batch_row_begin,
|
||||
entries_d.data().get(), cuts_d.data().get(), cut_row_ptrs_d.data().get(),
|
||||
batch_row_begin, batch_nrows,
|
||||
row_batch.ind_ptr[row_begin_idx + batch_row_begin],
|
||||
row_batch.offset[row_begin_idx + batch_row_begin],
|
||||
row_stride, null_gidx_value);
|
||||
|
||||
dh::safe_cuda(cudaGetLastError());
|
||||
@@ -702,10 +702,10 @@ class GPUHistMaker : public TreeUpdater {
|
||||
|
||||
monitor_.Start("BinningCompression", device_list_);
|
||||
{
|
||||
dmlc::DataIter<RowBatch>* iter = dmat->RowIterator();
|
||||
dmlc::DataIter<SparsePage>* iter = dmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
CHECK(iter->Next()) << "Empty batches are not supported";
|
||||
const RowBatch& batch = iter->Value();
|
||||
const SparsePage& batch = iter->Value();
|
||||
// Create device shards
|
||||
dh::ExecuteIndexShards(&shards_, [&](int i, std::unique_ptr<DeviceShard>& shard) {
|
||||
shard = std::unique_ptr<DeviceShard>
|
||||
|
||||
@@ -344,17 +344,18 @@ class CQHistMaker: public HistMaker<TStats> {
|
||||
{
|
||||
thread_hist_.resize(omp_get_max_threads());
|
||||
// start accumulating statistics
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fset);
|
||||
auto iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
auto batch = iter->Value();
|
||||
// start enumeration
|
||||
const auto nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
const auto nsize = static_cast<bst_omp_uint>(fset.size());
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
int offset = feat2workindex_[batch.col_index[i]];
|
||||
int fid = fset[i];
|
||||
int offset = feat2workindex_[fid];
|
||||
if (offset >= 0) {
|
||||
this->UpdateHistCol(gpair, batch[i], info, tree,
|
||||
this->UpdateHistCol(gpair, batch[fid], info, tree,
|
||||
fset, offset,
|
||||
&thread_hist_[omp_get_thread_num()]);
|
||||
}
|
||||
@@ -425,20 +426,20 @@ class CQHistMaker: public HistMaker<TStats> {
|
||||
work_set_.resize(std::unique(work_set_.begin(), work_set_.end()) - work_set_.begin());
|
||||
|
||||
// start accumulating statistics
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(work_set_);
|
||||
auto iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
auto batch = iter->Value();
|
||||
// TWOPASS: use the real set + split set in the column iteration.
|
||||
this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree);
|
||||
|
||||
// start enumeration
|
||||
const auto nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
int offset = feat2workindex_[batch.col_index[i]];
|
||||
for (bst_omp_uint fid = 0; fid < nsize; ++fid) {
|
||||
int offset = feat2workindex_[fid];
|
||||
if (offset >= 0) {
|
||||
this->UpdateSketchCol(gpair, batch[i], tree,
|
||||
this->UpdateSketchCol(gpair, batch[fid], tree,
|
||||
work_set_size, offset,
|
||||
&thread_sketch_[omp_get_thread_num()]);
|
||||
}
|
||||
@@ -494,7 +495,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
||||
}
|
||||
|
||||
inline void UpdateHistCol(const std::vector<GradientPair> &gpair,
|
||||
const ColBatch::Inst &c,
|
||||
const SparsePage::Inst &c,
|
||||
const MetaInfo &info,
|
||||
const RegTree &tree,
|
||||
const std::vector<bst_uint> &fset,
|
||||
@@ -546,7 +547,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
||||
}
|
||||
}
|
||||
inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
|
||||
const ColBatch::Inst &c,
|
||||
const SparsePage::Inst &c,
|
||||
const RegTree &tree,
|
||||
size_t work_set_size,
|
||||
bst_uint offset,
|
||||
@@ -712,18 +713,18 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
|
||||
std::unique(this->work_set_.begin(), this->work_set_.end()) - this->work_set_.begin());
|
||||
|
||||
// start accumulating statistics
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(this->work_set_);
|
||||
auto iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
auto batch = iter->Value();
|
||||
// TWOPASS: use the real set + split set in the column iteration.
|
||||
this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree);
|
||||
|
||||
// start enumeration
|
||||
const auto nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
const auto nsize = static_cast<bst_omp_uint>(this->work_set_.size());
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
int offset = this->feat2workindex_[batch.col_index[i]];
|
||||
int offset = this->feat2workindex_[this->work_set_[i]];
|
||||
if (offset >= 0) {
|
||||
this->UpdateHistCol(gpair, batch[i], info, tree,
|
||||
fset, offset,
|
||||
@@ -769,19 +770,19 @@ class QuantileHistMaker: public HistMaker<TStats> {
|
||||
sketchs_[i].Init(info.num_row_, this->param_.sketch_eps);
|
||||
}
|
||||
// start accumulating statistics
|
||||
dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
|
||||
auto iter = p_fmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const RowBatch &batch = iter->Value();
|
||||
auto batch = iter->Value();
|
||||
// parallel convert to column major format
|
||||
common::ParallelGroupBuilder<SparseBatch::Entry>
|
||||
common::ParallelGroupBuilder<Entry>
|
||||
builder(&col_ptr_, &col_data_, &thread_col_ptr_);
|
||||
builder.InitBudget(tree.param.num_feature, nthread);
|
||||
|
||||
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
|
||||
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.Size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nbatch; ++i) {
|
||||
RowBatch::Inst inst = batch[i];
|
||||
SparsePage::Inst inst = batch[i];
|
||||
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||
int nid = this->position_[ridx];
|
||||
if (nid >= 0) {
|
||||
@@ -800,13 +801,13 @@ class QuantileHistMaker: public HistMaker<TStats> {
|
||||
builder.InitStorage();
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nbatch; ++i) {
|
||||
RowBatch::Inst inst = batch[i];
|
||||
SparsePage::Inst inst = batch[i];
|
||||
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||
const int nid = this->position_[ridx];
|
||||
if (nid >= 0) {
|
||||
for (bst_uint j = 0; j < inst.length; ++j) {
|
||||
builder.Push(inst[j].index,
|
||||
SparseBatch::Entry(nid, inst[j].fvalue),
|
||||
Entry(nid, inst[j].fvalue),
|
||||
omp_get_thread_num());
|
||||
}
|
||||
}
|
||||
@@ -816,7 +817,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint k = 0; k < nfeat; ++k) {
|
||||
for (size_t i = col_ptr_[k]; i < col_ptr_[k+1]; ++i) {
|
||||
const SparseBatch::Entry &e = col_data_[i];
|
||||
const Entry &e = col_data_[i];
|
||||
const int wid = this->node2workindex_[e.index];
|
||||
sketchs_[wid * tree.param.num_feature + k].Push(e.fvalue, gpair[e.index].GetHess());
|
||||
}
|
||||
@@ -873,7 +874,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
|
||||
// local temp column data structure
|
||||
std::vector<size_t> col_ptr_;
|
||||
// local storage of column data
|
||||
std::vector<SparseBatch::Entry> col_data_;
|
||||
std::vector<Entry> col_data_;
|
||||
std::vector<std::vector<size_t> > thread_col_ptr_;
|
||||
// per node, per feature sketch
|
||||
std::vector<common::WQuantileSketch<bst_float, bst_float> > sketchs_;
|
||||
|
||||
@@ -57,15 +57,15 @@ class TreeRefresher: public TreeUpdater {
|
||||
{
|
||||
const MetaInfo &info = p_fmat->Info();
|
||||
// start accumulating statistics
|
||||
dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
|
||||
auto *iter = p_fmat->RowIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const RowBatch &batch = iter->Value();
|
||||
CHECK_LT(batch.size, std::numeric_limits<unsigned>::max());
|
||||
const auto nbatch = static_cast<bst_omp_uint>(batch.size);
|
||||
auto batch = iter->Value();
|
||||
CHECK_LT(batch.Size(), std::numeric_limits<unsigned>::max());
|
||||
const auto nbatch = static_cast<bst_omp_uint>(batch.Size());
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < nbatch; ++i) {
|
||||
RowBatch::Inst inst = batch[i];
|
||||
SparsePage::Inst inst = batch[i];
|
||||
const int tid = omp_get_thread_num();
|
||||
const auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||
RegTree::FVec &feats = fvec_temp[tid];
|
||||
|
||||
@@ -144,18 +144,18 @@ class SketchMaker: public BaseMaker {
|
||||
// number of rows in
|
||||
const size_t nrows = p_fmat->BufferedRowset().Size();
|
||||
// start accumulating statistics
|
||||
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
|
||||
auto iter = p_fmat->ColIterator();
|
||||
iter->BeforeFirst();
|
||||
while (iter->Next()) {
|
||||
const ColBatch &batch = iter->Value();
|
||||
auto batch = iter->Value();
|
||||
// start enumeration
|
||||
const auto nsize = static_cast<bst_omp_uint>(batch.size);
|
||||
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
||||
#pragma omp parallel for schedule(dynamic, 1)
|
||||
for (bst_omp_uint i = 0; i < nsize; ++i) {
|
||||
this->UpdateSketchCol(gpair, batch[i], tree,
|
||||
for (bst_omp_uint fidx = 0; fidx < nsize; ++fidx) {
|
||||
this->UpdateSketchCol(gpair, batch[fidx], tree,
|
||||
node_stats_,
|
||||
batch.col_index[i],
|
||||
batch[i].length == nrows,
|
||||
fidx,
|
||||
batch[fidx].length == nrows,
|
||||
&thread_sketch_[omp_get_thread_num()]);
|
||||
}
|
||||
}
|
||||
@@ -174,7 +174,7 @@ class SketchMaker: public BaseMaker {
|
||||
}
|
||||
// update sketch information in column fid
|
||||
inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
|
||||
const ColBatch::Inst &c,
|
||||
const SparsePage::Inst &c,
|
||||
const RegTree &tree,
|
||||
const std::vector<SKStats> &nstats,
|
||||
bst_uint fid,
|
||||
|
||||
Reference in New Issue
Block a user