Dmatrix refactor stage 1 (#3301)

* Use sparse page as singular CSR matrix representation

* Simplify dmatrix methods

* Reduce statefullness of batch iterators

* BREAKING CHANGE: Remove prob_buffer_row parameter. Users are instead recommended to sample their dataset as a preprocessing step before using XGBoost.
This commit is contained in:
Rory Mitchell
2018-06-07 10:25:58 +12:00
committed by GitHub
parent 286dccb8e8
commit a96039141a
47 changed files with 650 additions and 1036 deletions

View File

@@ -43,13 +43,12 @@ class BaseMaker: public TreeUpdater {
std::fill(fminmax_.begin(), fminmax_.end(),
-std::numeric_limits<bst_float>::max());
// start accumulating statistics
dmlc::DataIter<ColBatch>* iter = p_fmat->ColIterator();
auto iter = p_fmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
const ColBatch& batch = iter->Value();
for (bst_uint i = 0; i < batch.size; ++i) {
const bst_uint fid = batch.col_index[i];
const ColBatch::Inst& c = batch[i];
auto batch = iter->Value();
for (bst_uint fid = 0; fid < batch.Size(); ++fid) {
auto c = batch[fid];
if (c.length != 0) {
fminmax_[fid * 2 + 0] = std::max(-c[0].fvalue, fminmax_[fid * 2 + 0]);
fminmax_[fid * 2 + 1] = std::max(c[c.length - 1].fvalue, fminmax_[fid * 2 + 1]);
@@ -104,7 +103,7 @@ class BaseMaker: public TreeUpdater {
// ------static helper functions ------
// helper function to get to next level of the tree
/*! \brief this is helper function for row based data*/
inline static int NextLevel(const RowBatch::Inst &inst, const RegTree &tree, int nid) {
inline static int NextLevel(const SparsePage::Inst &inst, const RegTree &tree, int nid) {
const RegTree::Node &n = tree[nid];
bst_uint findex = n.SplitIndex();
for (unsigned i = 0; i < inst.length; ++i) {
@@ -244,12 +243,10 @@ class BaseMaker: public TreeUpdater {
* \param tree the regression tree structure
*/
inline void CorrectNonDefaultPositionByBatch(
const ColBatch& batch,
const std::vector<bst_uint> &sorted_split_set,
const SparsePage &batch, const std::vector<bst_uint> &sorted_split_set,
const RegTree &tree) {
for (size_t i = 0; i < batch.size; ++i) {
ColBatch::Inst col = batch[i];
const bst_uint fid = batch.col_index[i];
for (size_t fid = 0; fid < batch.Size(); ++fid) {
auto col = batch[fid];
auto it = std::lower_bound(sorted_split_set.begin(), sorted_split_set.end(), fid);
if (it != sorted_split_set.end() && *it == fid) {
@@ -306,12 +303,11 @@ class BaseMaker: public TreeUpdater {
const RegTree &tree) {
std::vector<unsigned> fsplits;
this->GetSplitSet(nodes, tree, &fsplits);
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fsplits);
auto iter = p_fmat->ColIterator();
while (iter->Next()) {
const ColBatch &batch = iter->Value();
for (size_t i = 0; i < batch.size; ++i) {
ColBatch::Inst col = batch[i];
const bst_uint fid = batch.col_index[i];
auto batch = iter->Value();
for (auto fid : fsplits) {
auto col = batch[fid];
const auto ndata = static_cast<bst_omp_uint>(col.length);
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {

View File

@@ -252,7 +252,7 @@ class ColMaker: public TreeUpdater {
}
// parallel find the best split of current fid
// this function does not support nested functions
inline void ParallelFindSplit(const ColBatch::Inst &col,
inline void ParallelFindSplit(const SparsePage::Inst &col,
bst_uint fid,
const DMatrix &fmat,
const std::vector<GradientPair> &gpair) {
@@ -439,8 +439,8 @@ class ColMaker: public TreeUpdater {
}
}
// same as EnumerateSplit, with cacheline prefetch optimization
inline void EnumerateSplitCacheOpt(const ColBatch::Entry *begin,
const ColBatch::Entry *end,
inline void EnumerateSplitCacheOpt(const Entry *begin,
const Entry *end,
int d_step,
bst_uint fid,
const std::vector<GradientPair> &gpair,
@@ -457,18 +457,18 @@ class ColMaker: public TreeUpdater {
int buf_position[kBuffer] = {};
GradientPair buf_gpair[kBuffer] = {};
// aligned ending position
const ColBatch::Entry *align_end;
const Entry *align_end;
if (d_step > 0) {
align_end = begin + (end - begin) / kBuffer * kBuffer;
} else {
align_end = begin - (begin - end) / kBuffer * kBuffer;
}
int i;
const ColBatch::Entry *it;
const Entry *it;
const int align_step = d_step * kBuffer;
// internal cached loop
for (it = begin; it != align_end; it += align_step) {
const ColBatch::Entry *p;
const Entry *p;
for (i = 0, p = it; i < kBuffer; ++i, p += d_step) {
buf_position[i] = position_[p->index];
buf_gpair[i] = gpair[p->index];
@@ -519,8 +519,8 @@ class ColMaker: public TreeUpdater {
}
// enumerate the split values of specific feature
inline void EnumerateSplit(const ColBatch::Entry *begin,
const ColBatch::Entry *end,
inline void EnumerateSplit(const Entry *begin,
const Entry *end,
int d_step,
bst_uint fid,
const std::vector<GradientPair> &gpair,
@@ -538,7 +538,7 @@ class ColMaker: public TreeUpdater {
}
// left statistics
TStats c(param_);
for (const ColBatch::Entry *it = begin; it != end; it += d_step) {
for (const Entry *it = begin; it != end; it += d_step) {
const bst_uint ridx = it->index;
const int nid = position_[ridx];
if (nid < 0) continue;
@@ -602,25 +602,26 @@ class ColMaker: public TreeUpdater {
}
// update the solution candidate
virtual void UpdateSolution(const ColBatch& batch,
const std::vector<GradientPair>& gpair,
const DMatrix& fmat) {
virtual void UpdateSolution(const SparsePage &batch,
const std::vector<bst_uint> &feat_set,
const std::vector<GradientPair> &gpair,
const DMatrix &fmat) {
const MetaInfo& info = fmat.Info();
// start enumeration
const auto nsize = static_cast<bst_omp_uint>(batch.size);
const auto num_features = static_cast<bst_omp_uint>(feat_set.size());
#if defined(_OPENMP)
const int batch_size = std::max(static_cast<int>(nsize / this->nthread_ / 32), 1);
const int batch_size = std::max(static_cast<int>(num_features / this->nthread_ / 32), 1);
#endif
int poption = param_.parallel_option;
if (poption == 2) {
poption = static_cast<int>(nsize) * 2 < this->nthread_ ? 1 : 0;
poption = static_cast<int>(num_features) * 2 < this->nthread_ ? 1 : 0;
}
if (poption == 0) {
#pragma omp parallel for schedule(dynamic, batch_size)
for (bst_omp_uint i = 0; i < nsize; ++i) {
const bst_uint fid = batch.col_index[i];
for (bst_omp_uint i = 0; i < num_features; ++i) {
int fid = feat_set[i];
const int tid = omp_get_thread_num();
const ColBatch::Inst c = batch[i];
auto c = batch[fid];
const bool ind = c.length != 0 && c.data[0].fvalue == c.data[c.length - 1].fvalue;
if (param_.NeedForwardSearch(fmat.GetColDensity(fid), ind)) {
this->EnumerateSplit(c.data, c.data + c.length, +1,
@@ -632,8 +633,8 @@ class ColMaker: public TreeUpdater {
}
}
} else {
for (bst_omp_uint i = 0; i < nsize; ++i) {
this->ParallelFindSplit(batch[i], batch.col_index[i],
for (bst_omp_uint fid = 0; fid < num_features; ++fid) {
this->ParallelFindSplit(batch[fid], fid,
fmat, gpair);
}
}
@@ -653,9 +654,9 @@ class ColMaker: public TreeUpdater {
<< "colsample_bylevel cannot be zero.";
feat_set.resize(n);
}
dmlc::DataIter<ColBatch>* iter = p_fmat->ColIterator(feat_set);
auto iter = p_fmat->ColIterator();
while (iter->Next()) {
this->UpdateSolution(iter->Value(), gpair, *p_fmat);
this->UpdateSolution(iter->Value(), feat_set, gpair, *p_fmat);
}
// after this each thread's stemp will get the best candidates, aggregate results
this->SyncBestSolution(qexpand);
@@ -730,12 +731,11 @@ class ColMaker: public TreeUpdater {
}
std::sort(fsplits.begin(), fsplits.end());
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fsplits);
auto iter = p_fmat->ColIterator();
while (iter->Next()) {
const ColBatch &batch = iter->Value();
for (size_t i = 0; i < batch.size; ++i) {
ColBatch::Inst col = batch[i];
const bst_uint fid = batch.col_index[i];
auto batch = iter->Value();
for (auto fid : fsplits) {
auto col = batch[fid];
const auto ndata = static_cast<bst_omp_uint>(col.length);
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {
@@ -859,12 +859,11 @@ class DistColMaker : public ColMaker<TStats, TConstraint> {
boolmap_[j] = 0;
}
}
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fsplits);
auto iter = p_fmat->ColIterator();
while (iter->Next()) {
const ColBatch &batch = iter->Value();
for (size_t i = 0; i < batch.size; ++i) {
ColBatch::Inst col = batch[i];
const bst_uint fid = batch.col_index[i];
auto batch = iter->Value();
for (auto fid : fsplits) {
auto col = batch[fid];
const auto ndata = static_cast<bst_omp_uint>(col.length);
#pragma omp parallel for schedule(static)
for (bst_omp_uint j = 0; j < ndata; ++j) {

View File

@@ -661,16 +661,15 @@ class GPUMaker : public TreeUpdater {
// in case you end up with a DMatrix having no column access
// then make sure to enable that before copying the data!
if (!dmat->HaveColAccess(true)) {
const std::vector<bool> enable(nCols, true);
dmat->InitColAccess(enable, 1, nRows, true);
dmat->InitColAccess(nRows, true);
}
dmlc::DataIter<ColBatch>* iter = dmat->ColIterator();
auto iter = dmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
const ColBatch& batch = iter->Value();
for (int i = 0; i < batch.size; i++) {
const ColBatch::Inst& col = batch[i];
for (const ColBatch::Entry* it = col.data; it != col.data + col.length;
auto batch = iter->Value();
for (int i = 0; i < batch.Size(); i++) {
auto col = batch[i];
for (const Entry* it = col.data; it != col.data + col.length;
it++) {
int inst_id = static_cast<int>(it->index);
fval->push_back(it->fvalue);

View File

@@ -250,7 +250,7 @@ __device__ int upper_bound(const float* __restrict__ cuts, int n, float v) {
__global__ void compress_bin_ellpack_k
(common::CompressedBufferWriter wr, common::CompressedByteT* __restrict__ buffer,
const size_t* __restrict__ row_ptrs,
const RowBatch::Entry* __restrict__ entries,
const Entry* __restrict__ entries,
const float* __restrict__ cuts, const size_t* __restrict__ cut_rows,
size_t base_row, size_t n_rows, size_t row_ptr_begin, size_t row_stride,
unsigned int null_gidx_value) {
@@ -261,7 +261,7 @@ __global__ void compress_bin_ellpack_k
int row_size = static_cast<int>(row_ptrs[irow + 1] - row_ptrs[irow]);
unsigned int bin = null_gidx_value;
if (ifeature < row_size) {
RowBatch::Entry entry = entries[row_ptrs[irow] - row_ptr_begin + ifeature];
Entry entry = entries[row_ptrs[irow] - row_ptr_begin + ifeature];
int feature = entry.index;
float fvalue = entry.fvalue;
const float *feature_cuts = &cuts[cut_rows[feature]];
@@ -332,7 +332,7 @@ struct DeviceShard {
param(param),
prediction_cache_initialised(false) {}
void Init(const common::HistCutMatrix& hmat, const RowBatch& row_batch) {
void Init(const common::HistCutMatrix& hmat, const SparsePage& row_batch) {
// copy cuts to the GPU
dh::safe_cuda(cudaSetDevice(device_idx));
thrust::device_vector<float> cuts_d(hmat.cut);
@@ -340,7 +340,7 @@ struct DeviceShard {
// find the maximum row size
thrust::device_vector<size_t> row_ptr_d(
row_batch.ind_ptr + row_begin_idx, row_batch.ind_ptr + row_end_idx + 1);
&row_batch.offset[row_begin_idx], &row_batch.offset[row_end_idx + 1]);
auto row_iter = row_ptr_d.begin();
auto get_size = [=] __device__(size_t row) {
@@ -369,11 +369,11 @@ struct DeviceShard {
// bin and compress entries in batches of rows
// use no more than 1/16th of GPU memory per batch
size_t gpu_batch_nrows = dh::TotalMemory(device_idx) /
(16 * row_stride * sizeof(RowBatch::Entry));
(16 * row_stride * sizeof(Entry));
if (gpu_batch_nrows > n_rows) {
gpu_batch_nrows = n_rows;
}
thrust::device_vector<RowBatch::Entry> entries_d(gpu_batch_nrows * row_stride);
thrust::device_vector<Entry> entries_d(gpu_batch_nrows * row_stride);
size_t gpu_nbatches = dh::DivRoundUp(n_rows, gpu_batch_nrows);
for (size_t gpu_batch = 0; gpu_batch < gpu_nbatches; ++gpu_batch) {
size_t batch_row_begin = gpu_batch * gpu_batch_nrows;
@@ -383,13 +383,13 @@ struct DeviceShard {
}
size_t batch_nrows = batch_row_end - batch_row_begin;
size_t n_entries =
row_batch.ind_ptr[row_begin_idx + batch_row_end] -
row_batch.ind_ptr[row_begin_idx + batch_row_begin];
row_batch.offset[row_begin_idx + batch_row_end] -
row_batch.offset[row_begin_idx + batch_row_begin];
dh::safe_cuda
(cudaMemcpy
(entries_d.data().get(),
&row_batch.data_ptr[row_batch.ind_ptr[row_begin_idx + batch_row_begin]],
n_entries * sizeof(RowBatch::Entry), cudaMemcpyDefault));
&row_batch.data[row_batch.offset[row_begin_idx + batch_row_begin]],
n_entries * sizeof(Entry), cudaMemcpyDefault));
dim3 block3(32, 8, 1);
dim3 grid3(dh::DivRoundUp(n_rows, block3.x),
dh::DivRoundUp(row_stride, block3.y), 1);
@@ -398,7 +398,7 @@ struct DeviceShard {
row_ptr_d.data().get() + batch_row_begin,
entries_d.data().get(), cuts_d.data().get(), cut_row_ptrs_d.data().get(),
batch_row_begin, batch_nrows,
row_batch.ind_ptr[row_begin_idx + batch_row_begin],
row_batch.offset[row_begin_idx + batch_row_begin],
row_stride, null_gidx_value);
dh::safe_cuda(cudaGetLastError());
@@ -702,10 +702,10 @@ class GPUHistMaker : public TreeUpdater {
monitor_.Start("BinningCompression", device_list_);
{
dmlc::DataIter<RowBatch>* iter = dmat->RowIterator();
dmlc::DataIter<SparsePage>* iter = dmat->RowIterator();
iter->BeforeFirst();
CHECK(iter->Next()) << "Empty batches are not supported";
const RowBatch& batch = iter->Value();
const SparsePage& batch = iter->Value();
// Create device shards
dh::ExecuteIndexShards(&shards_, [&](int i, std::unique_ptr<DeviceShard>& shard) {
shard = std::unique_ptr<DeviceShard>

View File

@@ -344,17 +344,18 @@ class CQHistMaker: public HistMaker<TStats> {
{
thread_hist_.resize(omp_get_max_threads());
// start accumulating statistics
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(fset);
auto iter = p_fmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
const ColBatch &batch = iter->Value();
auto batch = iter->Value();
// start enumeration
const auto nsize = static_cast<bst_omp_uint>(batch.size);
const auto nsize = static_cast<bst_omp_uint>(fset.size());
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint i = 0; i < nsize; ++i) {
int offset = feat2workindex_[batch.col_index[i]];
int fid = fset[i];
int offset = feat2workindex_[fid];
if (offset >= 0) {
this->UpdateHistCol(gpair, batch[i], info, tree,
this->UpdateHistCol(gpair, batch[fid], info, tree,
fset, offset,
&thread_hist_[omp_get_thread_num()]);
}
@@ -425,20 +426,20 @@ class CQHistMaker: public HistMaker<TStats> {
work_set_.resize(std::unique(work_set_.begin(), work_set_.end()) - work_set_.begin());
// start accumulating statistics
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(work_set_);
auto iter = p_fmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
const ColBatch &batch = iter->Value();
auto batch = iter->Value();
// TWOPASS: use the real set + split set in the column iteration.
this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree);
// start enumeration
const auto nsize = static_cast<bst_omp_uint>(batch.size);
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint i = 0; i < nsize; ++i) {
int offset = feat2workindex_[batch.col_index[i]];
for (bst_omp_uint fid = 0; fid < nsize; ++fid) {
int offset = feat2workindex_[fid];
if (offset >= 0) {
this->UpdateSketchCol(gpair, batch[i], tree,
this->UpdateSketchCol(gpair, batch[fid], tree,
work_set_size, offset,
&thread_sketch_[omp_get_thread_num()]);
}
@@ -494,7 +495,7 @@ class CQHistMaker: public HistMaker<TStats> {
}
inline void UpdateHistCol(const std::vector<GradientPair> &gpair,
const ColBatch::Inst &c,
const SparsePage::Inst &c,
const MetaInfo &info,
const RegTree &tree,
const std::vector<bst_uint> &fset,
@@ -546,7 +547,7 @@ class CQHistMaker: public HistMaker<TStats> {
}
}
inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
const ColBatch::Inst &c,
const SparsePage::Inst &c,
const RegTree &tree,
size_t work_set_size,
bst_uint offset,
@@ -712,18 +713,18 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
std::unique(this->work_set_.begin(), this->work_set_.end()) - this->work_set_.begin());
// start accumulating statistics
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator(this->work_set_);
auto iter = p_fmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
const ColBatch &batch = iter->Value();
auto batch = iter->Value();
// TWOPASS: use the real set + split set in the column iteration.
this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree);
// start enumeration
const auto nsize = static_cast<bst_omp_uint>(batch.size);
const auto nsize = static_cast<bst_omp_uint>(this->work_set_.size());
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint i = 0; i < nsize; ++i) {
int offset = this->feat2workindex_[batch.col_index[i]];
int offset = this->feat2workindex_[this->work_set_[i]];
if (offset >= 0) {
this->UpdateHistCol(gpair, batch[i], info, tree,
fset, offset,
@@ -769,19 +770,19 @@ class QuantileHistMaker: public HistMaker<TStats> {
sketchs_[i].Init(info.num_row_, this->param_.sketch_eps);
}
// start accumulating statistics
dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
auto iter = p_fmat->RowIterator();
iter->BeforeFirst();
while (iter->Next()) {
const RowBatch &batch = iter->Value();
auto batch = iter->Value();
// parallel convert to column major format
common::ParallelGroupBuilder<SparseBatch::Entry>
common::ParallelGroupBuilder<Entry>
builder(&col_ptr_, &col_data_, &thread_col_ptr_);
builder.InitBudget(tree.param.num_feature, nthread);
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.size);
const bst_omp_uint nbatch = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nbatch; ++i) {
RowBatch::Inst inst = batch[i];
SparsePage::Inst inst = batch[i];
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
int nid = this->position_[ridx];
if (nid >= 0) {
@@ -800,13 +801,13 @@ class QuantileHistMaker: public HistMaker<TStats> {
builder.InitStorage();
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nbatch; ++i) {
RowBatch::Inst inst = batch[i];
SparsePage::Inst inst = batch[i];
const bst_uint ridx = static_cast<bst_uint>(batch.base_rowid + i);
const int nid = this->position_[ridx];
if (nid >= 0) {
for (bst_uint j = 0; j < inst.length; ++j) {
builder.Push(inst[j].index,
SparseBatch::Entry(nid, inst[j].fvalue),
Entry(nid, inst[j].fvalue),
omp_get_thread_num());
}
}
@@ -816,7 +817,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint k = 0; k < nfeat; ++k) {
for (size_t i = col_ptr_[k]; i < col_ptr_[k+1]; ++i) {
const SparseBatch::Entry &e = col_data_[i];
const Entry &e = col_data_[i];
const int wid = this->node2workindex_[e.index];
sketchs_[wid * tree.param.num_feature + k].Push(e.fvalue, gpair[e.index].GetHess());
}
@@ -873,7 +874,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
// local temp column data structure
std::vector<size_t> col_ptr_;
// local storage of column data
std::vector<SparseBatch::Entry> col_data_;
std::vector<Entry> col_data_;
std::vector<std::vector<size_t> > thread_col_ptr_;
// per node, per feature sketch
std::vector<common::WQuantileSketch<bst_float, bst_float> > sketchs_;

View File

@@ -57,15 +57,15 @@ class TreeRefresher: public TreeUpdater {
{
const MetaInfo &info = p_fmat->Info();
// start accumulating statistics
dmlc::DataIter<RowBatch> *iter = p_fmat->RowIterator();
auto *iter = p_fmat->RowIterator();
iter->BeforeFirst();
while (iter->Next()) {
const RowBatch &batch = iter->Value();
CHECK_LT(batch.size, std::numeric_limits<unsigned>::max());
const auto nbatch = static_cast<bst_omp_uint>(batch.size);
auto batch = iter->Value();
CHECK_LT(batch.Size(), std::numeric_limits<unsigned>::max());
const auto nbatch = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(static)
for (bst_omp_uint i = 0; i < nbatch; ++i) {
RowBatch::Inst inst = batch[i];
SparsePage::Inst inst = batch[i];
const int tid = omp_get_thread_num();
const auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
RegTree::FVec &feats = fvec_temp[tid];

View File

@@ -144,18 +144,18 @@ class SketchMaker: public BaseMaker {
// number of rows in
const size_t nrows = p_fmat->BufferedRowset().Size();
// start accumulating statistics
dmlc::DataIter<ColBatch> *iter = p_fmat->ColIterator();
auto iter = p_fmat->ColIterator();
iter->BeforeFirst();
while (iter->Next()) {
const ColBatch &batch = iter->Value();
auto batch = iter->Value();
// start enumeration
const auto nsize = static_cast<bst_omp_uint>(batch.size);
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
#pragma omp parallel for schedule(dynamic, 1)
for (bst_omp_uint i = 0; i < nsize; ++i) {
this->UpdateSketchCol(gpair, batch[i], tree,
for (bst_omp_uint fidx = 0; fidx < nsize; ++fidx) {
this->UpdateSketchCol(gpair, batch[fidx], tree,
node_stats_,
batch.col_index[i],
batch[i].length == nrows,
fidx,
batch[fidx].length == nrows,
&thread_sketch_[omp_get_thread_num()]);
}
}
@@ -174,7 +174,7 @@ class SketchMaker: public BaseMaker {
}
// update sketch information in column fid
inline void UpdateSketchCol(const std::vector<GradientPair> &gpair,
const ColBatch::Inst &c,
const SparsePage::Inst &c,
const RegTree &tree,
const std::vector<SKStats> &nstats,
bst_uint fid,