diff --git a/src/common/hist_util.cc b/src/common/hist_util.cc index 4324a74d6..b32ba349c 100644 --- a/src/common/hist_util.cc +++ b/src/common/hist_util.cc @@ -36,7 +36,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) { auto iter = p_fmat->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); #pragma omp parallel num_threads(nthread) { CHECK_EQ(nthread, omp_get_num_threads()); @@ -137,7 +137,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) { iter->BeforeFirst(); row_ptr.push_back(0); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); const size_t rbegin = row_ptr.size() - 1; for (size_t i = 0; i < batch.Size(); ++i) { row_ptr.push_back(batch[i].length + row_ptr.back()); diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index 05c026476..59cc32da3 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -67,7 +67,7 @@ void SimpleDMatrix::MakeOneBatch(SparsePage* pcol, bool sorted) { iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); #pragma omp parallel for schedule(static) for (long i = 0; i < static_cast(batch.Size()); ++i) { // NOLINT(*) int tid = omp_get_thread_num(); diff --git a/src/data/sparse_page_dmatrix.cc b/src/data/sparse_page_dmatrix.cc index ec2bd2b67..55e078d84 100644 --- a/src/data/sparse_page_dmatrix.cc +++ b/src/data/sparse_page_dmatrix.cc @@ -185,7 +185,7 @@ void SparsePageDMatrix::InitColAccess( while (true) { if (batch_ptr != batch_top) { - auto batch = iter->Value(); + auto &batch = iter->Value(); CHECK_EQ(batch_top, batch.Size()); for (size_t i = batch_ptr; i < batch_top; ++i) { auto ridx = static_cast(batch.base_rowid + i); diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc index ed13bb71c..7f6d424ac 100644 --- a/src/gbm/gblinear.cc +++ b/src/gbm/gblinear.cc @@ -155,7 +155,7 @@ class GBLinear : public GradientBooster { auto iter = p_fmat->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // parallel over local batch const auto nsize = static_cast(batch.Size()); #pragma omp parallel for schedule(static) @@ -207,7 +207,7 @@ class GBLinear : public GradientBooster { const int ngroup = model_.param.num_output_group; preds.resize(p_fmat->Info().num_row_ * ngroup); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // output convention: nrow * k, where nrow is number of rows // k is number of group // parallel over local batch diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index a619114d8..739acde3b 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -441,7 +441,7 @@ class Dart : public GBTree { auto* self = static_cast(this); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // parallel over local batch constexpr int kUnroll = 8; const auto nsize = static_cast(batch.Size()); diff --git a/src/linear/coordinate_common.h b/src/linear/coordinate_common.h index 04d078d4d..18d87fbf1 100644 --- a/src/linear/coordinate_common.h +++ b/src/linear/coordinate_common.h @@ -67,7 +67,7 @@ inline std::pair GetGradient(int group_idx, int num_group, int f double sum_grad = 0.0, sum_hess = 0.0; auto iter = p_fmat->ColIterator(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); auto col = batch[fidx]; const auto ndata = static_cast(col.length); for (bst_omp_uint j = 0; j < ndata; ++j) { @@ -98,7 +98,7 @@ inline std::pair GetGradientParallel(int group_idx, int num_grou double sum_grad = 0.0, sum_hess = 0.0; auto iter = p_fmat->ColIterator(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); auto col = batch[fidx]; const auto ndata = static_cast(col.length); #pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess) @@ -156,7 +156,7 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group, if (dw == 0.0f) return; auto iter = p_fmat->ColIterator(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); auto col = batch[fidx]; // update grad value const auto num_row = static_cast(col.length); @@ -327,7 +327,7 @@ class GreedyFeatureSelector : public FeatureSelector { std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.)); auto iter = p_fmat->ColIterator(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nfeat; ++i) { const auto col = batch[i]; @@ -394,7 +394,7 @@ class ThriftyFeatureSelector : public FeatureSelector { std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.)); auto iter = p_fmat->ColIterator(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // column-parallel is usually faster than row-parallel #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nfeat; ++i) { diff --git a/src/linear/updater_gpu_coordinate.cu b/src/linear/updater_gpu_coordinate.cu index 672fbd1ff..cf4e47c61 100644 --- a/src/linear/updater_gpu_coordinate.cu +++ b/src/linear/updater_gpu_coordinate.cu @@ -237,7 +237,7 @@ class GPUCoordinateUpdater : public LinearUpdater { auto iter = p_fmat->ColIterator(); CHECK(p_fmat->SingleColBlock()); iter->Next(); - auto batch = iter->Value(); + auto &batch = iter->Value(); shards.resize(n_devices); // Create device shards diff --git a/src/linear/updater_shotgun.cc b/src/linear/updater_shotgun.cc index f2e21c5f6..11b91cbce 100644 --- a/src/linear/updater_shotgun.cc +++ b/src/linear/updater_shotgun.cc @@ -81,7 +81,7 @@ class ShotgunUpdater : public LinearUpdater { param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0); auto iter = p_fmat->ColIterator(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); const auto nfeat = static_cast(batch.Size()); #pragma omp parallel for schedule(static) for (bst_omp_uint i = 0; i < nfeat; ++i) { diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 58a95a9e0..964bbaa0d 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -236,7 +236,7 @@ class CPUPredictor : public Predictor { auto iter = p_fmat->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // parallel over local batch const auto nsize = static_cast(batch.Size()); #pragma omp parallel for schedule(static) @@ -285,7 +285,7 @@ class CPUPredictor : public Predictor { const std::vector& base_margin = info.base_margin_; iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // parallel over local batch const auto nsize = static_cast(batch.Size()); #pragma omp parallel for schedule(static) diff --git a/src/predictor/gpu_predictor.cu b/src/predictor/gpu_predictor.cu index 9e576adb4..21afc37f3 100644 --- a/src/predictor/gpu_predictor.cu +++ b/src/predictor/gpu_predictor.cu @@ -64,7 +64,7 @@ struct DeviceMatrix { iter->BeforeFirst(); size_t data_offset = 0; while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // Copy row ptr dh::safe_cuda(cudaMemcpy( row_ptr.Data() + batch.base_rowid, batch.offset.data(), diff --git a/src/tree/updater_basemaker-inl.h b/src/tree/updater_basemaker-inl.h index c2b92e31d..06ca474ba 100644 --- a/src/tree/updater_basemaker-inl.h +++ b/src/tree/updater_basemaker-inl.h @@ -46,7 +46,7 @@ class BaseMaker: public TreeUpdater { auto iter = p_fmat->ColIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); for (bst_uint fid = 0; fid < batch.Size(); ++fid) { auto c = batch[fid]; if (c.length != 0) { @@ -305,7 +305,7 @@ class BaseMaker: public TreeUpdater { this->GetSplitSet(nodes, tree, &fsplits); auto iter = p_fmat->ColIterator(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); for (auto fid : fsplits) { auto col = batch[fid]; const auto ndata = static_cast(col.length); diff --git a/src/tree/updater_colmaker.cc b/src/tree/updater_colmaker.cc index df9a9a453..a87d96b65 100644 --- a/src/tree/updater_colmaker.cc +++ b/src/tree/updater_colmaker.cc @@ -731,7 +731,7 @@ class ColMaker: public TreeUpdater { fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin()); auto iter = p_fmat->ColIterator(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); for (auto fid : fsplits) { auto col = batch[fid]; const auto ndata = static_cast(col.length); @@ -862,7 +862,7 @@ class DistColMaker : public ColMaker { } auto iter = p_fmat->ColIterator(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); for (auto fid : fsplits) { auto col = batch[fid]; const auto ndata = static_cast(col.length); diff --git a/src/tree/updater_gpu.cu b/src/tree/updater_gpu.cu index 4d87572eb..616c75179 100644 --- a/src/tree/updater_gpu.cu +++ b/src/tree/updater_gpu.cu @@ -666,7 +666,7 @@ class GPUMaker : public TreeUpdater { auto iter = dmat->ColIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); for (int i = 0; i < batch.Size(); i++) { auto col = batch[i]; for (const Entry* it = col.data; it != col.data + col.length; diff --git a/src/tree/updater_histmaker.cc b/src/tree/updater_histmaker.cc index 638017355..97d03359a 100644 --- a/src/tree/updater_histmaker.cc +++ b/src/tree/updater_histmaker.cc @@ -347,7 +347,7 @@ class CQHistMaker: public HistMaker { auto iter = p_fmat->ColIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // start enumeration const auto nsize = static_cast(fset.size()); #pragma omp parallel for schedule(dynamic, 1) @@ -429,7 +429,7 @@ class CQHistMaker: public HistMaker { auto iter = p_fmat->ColIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // TWOPASS: use the real set + split set in the column iteration. this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree); @@ -717,7 +717,7 @@ class GlobalProposalHistMaker: public CQHistMaker { auto iter = p_fmat->ColIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // TWOPASS: use the real set + split set in the column iteration. this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree); @@ -775,7 +775,7 @@ class QuantileHistMaker: public HistMaker { auto iter = p_fmat->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // parallel convert to column major format common::ParallelGroupBuilder builder(&col_ptr_, &col_data_, &thread_col_ptr_); diff --git a/src/tree/updater_refresh.cc b/src/tree/updater_refresh.cc index c365c6a96..b14fa248d 100644 --- a/src/tree/updater_refresh.cc +++ b/src/tree/updater_refresh.cc @@ -60,7 +60,7 @@ class TreeRefresher: public TreeUpdater { auto *iter = p_fmat->RowIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); CHECK_LT(batch.Size(), std::numeric_limits::max()); const auto nbatch = static_cast(batch.Size()); #pragma omp parallel for schedule(static) diff --git a/src/tree/updater_skmaker.cc b/src/tree/updater_skmaker.cc index 8040bb2fd..9549ff0c6 100644 --- a/src/tree/updater_skmaker.cc +++ b/src/tree/updater_skmaker.cc @@ -147,7 +147,7 @@ class SketchMaker: public BaseMaker { auto iter = p_fmat->ColIterator(); iter->BeforeFirst(); while (iter->Next()) { - auto batch = iter->Value(); + auto &batch = iter->Value(); // start enumeration const auto nsize = static_cast(batch.Size()); #pragma omp parallel for schedule(dynamic, 1)