Refactor DMatrix to return batches of different page types (#4686)

* Use explicit template parameter for specifying page type.
This commit is contained in:
Rong Ou
2019-08-03 12:10:34 -07:00
committed by Jiaming Yuan
parent e930a8e54f
commit 6edddd7966
41 changed files with 477 additions and 470 deletions

View File

@@ -165,7 +165,7 @@ void SparseCuts::Build(DMatrix* dmat, uint32_t const max_num_bins) {
sparse_cuts[i].reset(new SparseCuts(&cuts_containers[i]));
}
for (auto const& page : dmat->GetColumnBatches()) {
for (auto const& page : dmat->GetBatches<CSCPage>()) {
CHECK_LE(page.Size(), dmat->Info().num_col_);
monitor_.Start("Load balance");
std::vector<size_t> col_ptr = LoadBalance(page, nthreads);
@@ -247,7 +247,7 @@ void DenseCuts::Build(DMatrix* p_fmat, uint32_t max_num_bins) {
// Use group index for weights?
bool const use_group = UseGroup(p_fmat);
for (const auto &batch : p_fmat->GetRowBatches()) {
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
size_t group_ind = 0;
if (use_group) {
group_ind = this->SearchGroupIndFromRow(group_ptr, batch.base_rowid);
@@ -336,7 +336,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {
size_t new_size = 1;
for (const auto &batch : p_fmat->GetRowBatches()) {
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
new_size += batch.Size();
}
@@ -346,7 +346,7 @@ void GHistIndexMatrix::Init(DMatrix* p_fmat, int max_num_bins) {
size_t rbegin = 0;
size_t prev_sum = 0;
for (const auto &batch : p_fmat->GetRowBatches()) {
for (const auto &batch : p_fmat->GetBatches<SparsePage>()) {
// The number of threads is pegged to the batch size. If the OMP
// block is parallelized on anything other than the batch/block size,
// it should be reassigned