Remove accidental SparsePage copies (#3583)
This commit is contained in:
parent
0b607fb884
commit
645996b12f
@ -36,7 +36,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
|
|||||||
auto iter = p_fmat->RowIterator();
|
auto iter = p_fmat->RowIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
#pragma omp parallel num_threads(nthread)
|
#pragma omp parallel num_threads(nthread)
|
||||||
{
|
{
|
||||||
CHECK_EQ(nthread, omp_get_num_threads());
|
CHECK_EQ(nthread, omp_get_num_threads());
|
||||||
@ -137,7 +137,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {
|
|||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
row_ptr.push_back(0);
|
row_ptr.push_back(0);
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
const size_t rbegin = row_ptr.size() - 1;
|
const size_t rbegin = row_ptr.size() - 1;
|
||||||
for (size_t i = 0; i < batch.Size(); ++i) {
|
for (size_t i = 0; i < batch.Size(); ++i) {
|
||||||
row_ptr.push_back(batch[i].length + row_ptr.back());
|
row_ptr.push_back(batch[i].length + row_ptr.back());
|
||||||
|
|||||||
@ -67,7 +67,7 @@ void SimpleDMatrix::MakeOneBatch(SparsePage* pcol, bool sorted) {
|
|||||||
|
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (long i = 0; i < static_cast<long>(batch.Size()); ++i) { // NOLINT(*)
|
for (long i = 0; i < static_cast<long>(batch.Size()); ++i) { // NOLINT(*)
|
||||||
int tid = omp_get_thread_num();
|
int tid = omp_get_thread_num();
|
||||||
|
|||||||
@ -185,7 +185,7 @@ void SparsePageDMatrix::InitColAccess(
|
|||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
if (batch_ptr != batch_top) {
|
if (batch_ptr != batch_top) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
CHECK_EQ(batch_top, batch.Size());
|
CHECK_EQ(batch_top, batch.Size());
|
||||||
for (size_t i = batch_ptr; i < batch_top; ++i) {
|
for (size_t i = batch_ptr; i < batch_top; ++i) {
|
||||||
auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
auto ridx = static_cast<bst_uint>(batch.base_rowid + i);
|
||||||
|
|||||||
@ -155,7 +155,7 @@ class GBLinear : public GradientBooster {
|
|||||||
auto iter = p_fmat->RowIterator();
|
auto iter = p_fmat->RowIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// parallel over local batch
|
// parallel over local batch
|
||||||
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
@ -207,7 +207,7 @@ class GBLinear : public GradientBooster {
|
|||||||
const int ngroup = model_.param.num_output_group;
|
const int ngroup = model_.param.num_output_group;
|
||||||
preds.resize(p_fmat->Info().num_row_ * ngroup);
|
preds.resize(p_fmat->Info().num_row_ * ngroup);
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// output convention: nrow * k, where nrow is number of rows
|
// output convention: nrow * k, where nrow is number of rows
|
||||||
// k is number of group
|
// k is number of group
|
||||||
// parallel over local batch
|
// parallel over local batch
|
||||||
|
|||||||
@ -441,7 +441,7 @@ class Dart : public GBTree {
|
|||||||
auto* self = static_cast<Derived*>(this);
|
auto* self = static_cast<Derived*>(this);
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// parallel over local batch
|
// parallel over local batch
|
||||||
constexpr int kUnroll = 8;
|
constexpr int kUnroll = 8;
|
||||||
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
||||||
|
|||||||
@ -67,7 +67,7 @@ inline std::pair<double, double> GetGradient(int group_idx, int num_group, int f
|
|||||||
double sum_grad = 0.0, sum_hess = 0.0;
|
double sum_grad = 0.0, sum_hess = 0.0;
|
||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
auto col = batch[fidx];
|
auto col = batch[fidx];
|
||||||
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
||||||
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
for (bst_omp_uint j = 0; j < ndata; ++j) {
|
||||||
@ -98,7 +98,7 @@ inline std::pair<double, double> GetGradientParallel(int group_idx, int num_grou
|
|||||||
double sum_grad = 0.0, sum_hess = 0.0;
|
double sum_grad = 0.0, sum_hess = 0.0;
|
||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
auto col = batch[fidx];
|
auto col = batch[fidx];
|
||||||
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
||||||
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
|
#pragma omp parallel for schedule(static) reduction(+ : sum_grad, sum_hess)
|
||||||
@ -156,7 +156,7 @@ inline void UpdateResidualParallel(int fidx, int group_idx, int num_group,
|
|||||||
if (dw == 0.0f) return;
|
if (dw == 0.0f) return;
|
||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
auto col = batch[fidx];
|
auto col = batch[fidx];
|
||||||
// update grad value
|
// update grad value
|
||||||
const auto num_row = static_cast<bst_omp_uint>(col.length);
|
const auto num_row = static_cast<bst_omp_uint>(col.length);
|
||||||
@ -327,7 +327,7 @@ class GreedyFeatureSelector : public FeatureSelector {
|
|||||||
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
|
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
|
||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
||||||
const auto col = batch[i];
|
const auto col = batch[i];
|
||||||
@ -394,7 +394,7 @@ class ThriftyFeatureSelector : public FeatureSelector {
|
|||||||
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
|
std::fill(gpair_sums_.begin(), gpair_sums_.end(), std::make_pair(0., 0.));
|
||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// column-parallel is usually faster than row-parallel
|
// column-parallel is usually faster than row-parallel
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
||||||
|
|||||||
@ -237,7 +237,7 @@ class GPUCoordinateUpdater : public LinearUpdater {
|
|||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
CHECK(p_fmat->SingleColBlock());
|
CHECK(p_fmat->SingleColBlock());
|
||||||
iter->Next();
|
iter->Next();
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
|
|
||||||
shards.resize(n_devices);
|
shards.resize(n_devices);
|
||||||
// Create device shards
|
// Create device shards
|
||||||
|
|||||||
@ -81,7 +81,7 @@ class ShotgunUpdater : public LinearUpdater {
|
|||||||
param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0);
|
param_.reg_alpha_denorm, param_.reg_lambda_denorm, 0);
|
||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
|
const auto nfeat = static_cast<bst_omp_uint>(batch.Size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
for (bst_omp_uint i = 0; i < nfeat; ++i) {
|
||||||
|
|||||||
@ -236,7 +236,7 @@ class CPUPredictor : public Predictor {
|
|||||||
auto iter = p_fmat->RowIterator();
|
auto iter = p_fmat->RowIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// parallel over local batch
|
// parallel over local batch
|
||||||
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
@ -285,7 +285,7 @@ class CPUPredictor : public Predictor {
|
|||||||
const std::vector<bst_float>& base_margin = info.base_margin_;
|
const std::vector<bst_float>& base_margin = info.base_margin_;
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// parallel over local batch
|
// parallel over local batch
|
||||||
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
|
|||||||
@ -64,7 +64,7 @@ struct DeviceMatrix {
|
|||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
size_t data_offset = 0;
|
size_t data_offset = 0;
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// Copy row ptr
|
// Copy row ptr
|
||||||
dh::safe_cuda(cudaMemcpy(
|
dh::safe_cuda(cudaMemcpy(
|
||||||
row_ptr.Data() + batch.base_rowid, batch.offset.data(),
|
row_ptr.Data() + batch.base_rowid, batch.offset.data(),
|
||||||
|
|||||||
@ -46,7 +46,7 @@ class BaseMaker: public TreeUpdater {
|
|||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
for (bst_uint fid = 0; fid < batch.Size(); ++fid) {
|
for (bst_uint fid = 0; fid < batch.Size(); ++fid) {
|
||||||
auto c = batch[fid];
|
auto c = batch[fid];
|
||||||
if (c.length != 0) {
|
if (c.length != 0) {
|
||||||
@ -305,7 +305,7 @@ class BaseMaker: public TreeUpdater {
|
|||||||
this->GetSplitSet(nodes, tree, &fsplits);
|
this->GetSplitSet(nodes, tree, &fsplits);
|
||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
for (auto fid : fsplits) {
|
for (auto fid : fsplits) {
|
||||||
auto col = batch[fid];
|
auto col = batch[fid];
|
||||||
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
||||||
|
|||||||
@ -731,7 +731,7 @@ class ColMaker: public TreeUpdater {
|
|||||||
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
fsplits.resize(std::unique(fsplits.begin(), fsplits.end()) - fsplits.begin());
|
||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
for (auto fid : fsplits) {
|
for (auto fid : fsplits) {
|
||||||
auto col = batch[fid];
|
auto col = batch[fid];
|
||||||
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
||||||
@ -862,7 +862,7 @@ class DistColMaker : public ColMaker {
|
|||||||
}
|
}
|
||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
for (auto fid : fsplits) {
|
for (auto fid : fsplits) {
|
||||||
auto col = batch[fid];
|
auto col = batch[fid];
|
||||||
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
const auto ndata = static_cast<bst_omp_uint>(col.length);
|
||||||
|
|||||||
@ -666,7 +666,7 @@ class GPUMaker : public TreeUpdater {
|
|||||||
auto iter = dmat->ColIterator();
|
auto iter = dmat->ColIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
for (int i = 0; i < batch.Size(); i++) {
|
for (int i = 0; i < batch.Size(); i++) {
|
||||||
auto col = batch[i];
|
auto col = batch[i];
|
||||||
for (const Entry* it = col.data; it != col.data + col.length;
|
for (const Entry* it = col.data; it != col.data + col.length;
|
||||||
|
|||||||
@ -347,7 +347,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
|||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// start enumeration
|
// start enumeration
|
||||||
const auto nsize = static_cast<bst_omp_uint>(fset.size());
|
const auto nsize = static_cast<bst_omp_uint>(fset.size());
|
||||||
#pragma omp parallel for schedule(dynamic, 1)
|
#pragma omp parallel for schedule(dynamic, 1)
|
||||||
@ -429,7 +429,7 @@ class CQHistMaker: public HistMaker<TStats> {
|
|||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// TWOPASS: use the real set + split set in the column iteration.
|
// TWOPASS: use the real set + split set in the column iteration.
|
||||||
this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree);
|
this->CorrectNonDefaultPositionByBatch(batch, fsplit_set_, tree);
|
||||||
|
|
||||||
@ -717,7 +717,7 @@ class GlobalProposalHistMaker: public CQHistMaker<TStats> {
|
|||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// TWOPASS: use the real set + split set in the column iteration.
|
// TWOPASS: use the real set + split set in the column iteration.
|
||||||
this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree);
|
this->CorrectNonDefaultPositionByBatch(batch, this->fsplit_set_, tree);
|
||||||
|
|
||||||
@ -775,7 +775,7 @@ class QuantileHistMaker: public HistMaker<TStats> {
|
|||||||
auto iter = p_fmat->RowIterator();
|
auto iter = p_fmat->RowIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// parallel convert to column major format
|
// parallel convert to column major format
|
||||||
common::ParallelGroupBuilder<Entry>
|
common::ParallelGroupBuilder<Entry>
|
||||||
builder(&col_ptr_, &col_data_, &thread_col_ptr_);
|
builder(&col_ptr_, &col_data_, &thread_col_ptr_);
|
||||||
|
|||||||
@ -60,7 +60,7 @@ class TreeRefresher: public TreeUpdater {
|
|||||||
auto *iter = p_fmat->RowIterator();
|
auto *iter = p_fmat->RowIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
CHECK_LT(batch.Size(), std::numeric_limits<unsigned>::max());
|
CHECK_LT(batch.Size(), std::numeric_limits<unsigned>::max());
|
||||||
const auto nbatch = static_cast<bst_omp_uint>(batch.Size());
|
const auto nbatch = static_cast<bst_omp_uint>(batch.Size());
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
|
|||||||
@ -147,7 +147,7 @@ class SketchMaker: public BaseMaker {
|
|||||||
auto iter = p_fmat->ColIterator();
|
auto iter = p_fmat->ColIterator();
|
||||||
iter->BeforeFirst();
|
iter->BeforeFirst();
|
||||||
while (iter->Next()) {
|
while (iter->Next()) {
|
||||||
auto batch = iter->Value();
|
auto &batch = iter->Value();
|
||||||
// start enumeration
|
// start enumeration
|
||||||
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
const auto nsize = static_cast<bst_omp_uint>(batch.Size());
|
||||||
#pragma omp parallel for schedule(dynamic, 1)
|
#pragma omp parallel for schedule(dynamic, 1)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user