Refactor DMatrix to return batches of different page types (#4686)
* Use explicit template parameter for specifying page type.
This commit is contained in:
@@ -63,7 +63,7 @@ TEST(SparsePage, PushCSCAfterTranspose) {
|
||||
CreateSparsePageDMatrix(n_entries, 64UL, filename);
|
||||
const int ncols = dmat->Info().num_col_;
|
||||
SparsePage page; // Consolidated sparse page
|
||||
for (const auto &batch : dmat->GetRowBatches()) {
|
||||
for (const auto &batch : dmat->GetBatches<xgboost::SparsePage>()) {
|
||||
// Transpose each batch and push
|
||||
SparsePage tmp = batch.GetTranspose(ncols);
|
||||
page.PushCSC(tmp);
|
||||
|
||||
@@ -122,7 +122,7 @@ TEST(MetaInfo, LoadQid) {
|
||||
xgboost::Entry(2, 0), xgboost::Entry(3, 0), xgboost::Entry(4, 0.4),
|
||||
xgboost::Entry(5, 1), xgboost::Entry(1, 0), xgboost::Entry(2, 1),
|
||||
xgboost::Entry(3, 1), xgboost::Entry(4, 0.5), {5, 0}};
|
||||
for (const auto &batch : dmat->GetRowBatches()) {
|
||||
for (const auto &batch : dmat->GetBatches<xgboost::SparsePage>()) {
|
||||
CHECK_EQ(batch.base_rowid, 0);
|
||||
CHECK(batch.offset.HostVector() == expected_offset);
|
||||
CHECK(batch.data.HostVector() == expected_data);
|
||||
|
||||
@@ -20,10 +20,10 @@ TEST(SimpleCSRSource, SaveLoadBinary) {
|
||||
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
|
||||
|
||||
// Test we have non-empty batch
|
||||
EXPECT_EQ(dmat->GetRowBatches().begin().AtEnd(), false);
|
||||
EXPECT_EQ(dmat->GetBatches<xgboost::SparsePage>().begin().AtEnd(), false);
|
||||
|
||||
auto row_iter = dmat->GetRowBatches().begin();
|
||||
auto row_iter_read = dmat_read->GetRowBatches().begin();
|
||||
auto row_iter = dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
auto row_iter_read = dmat_read->GetBatches<xgboost::SparsePage>().begin();
|
||||
// Test the data read into the first row
|
||||
auto first_row = (*row_iter)[0];
|
||||
auto first_row_read = (*row_iter_read)[0];
|
||||
|
||||
@@ -28,12 +28,12 @@ TEST(SimpleDMatrix, RowAccess) {
|
||||
|
||||
// Loop over the batches and count the records
|
||||
int64_t row_count = 0;
|
||||
for (auto &batch : dmat->GetRowBatches()) {
|
||||
for (auto &batch : dmat->GetBatches<xgboost::SparsePage>()) {
|
||||
row_count += batch.Size();
|
||||
}
|
||||
EXPECT_EQ(row_count, dmat->Info().num_row_);
|
||||
// Test the data read into the first row
|
||||
auto &batch = *dmat->GetRowBatches().begin();
|
||||
auto &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
auto first_row = batch[0];
|
||||
ASSERT_EQ(first_row.size(), 3);
|
||||
EXPECT_EQ(first_row[2].index, 2);
|
||||
@@ -55,7 +55,7 @@ TEST(SimpleDMatrix, ColAccessWithoutBatches) {
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
int64_t num_col_batch = 0;
|
||||
for (const auto &batch : dmat->GetSortedColumnBatches()) {
|
||||
for (const auto &batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
|
||||
num_col_batch += 1;
|
||||
EXPECT_EQ(batch.Size(), dmat->Info().num_col_)
|
||||
<< "Expected batch size = number of cells as #batches is 1.";
|
||||
|
||||
@@ -33,7 +33,7 @@ TEST(SparsePageDMatrix, RowAccess) {
|
||||
xgboost::CreateSparsePageDMatrix(12, 64, filename);
|
||||
|
||||
// Test the data read into the first row
|
||||
auto &batch = *dmat->GetRowBatches().begin();
|
||||
auto &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
auto first_row = batch[0];
|
||||
ASSERT_EQ(first_row.size(), 3);
|
||||
EXPECT_EQ(first_row[2].index, 2);
|
||||
@@ -51,14 +51,14 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
EXPECT_EQ(dmat->GetColDensity(1), 0.5);
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
for (auto col_batch : dmat->GetSortedColumnBatches()) {
|
||||
for (auto col_batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
|
||||
EXPECT_EQ(col_batch.Size(), dmat->Info().num_col_);
|
||||
EXPECT_EQ(col_batch[1][0].fvalue, 10.0f);
|
||||
EXPECT_EQ(col_batch[1].size(), 1);
|
||||
}
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
for (auto col_batch : dmat->GetColumnBatches()) {
|
||||
for (auto col_batch : dmat->GetBatches<xgboost::CSCPage>()) {
|
||||
EXPECT_EQ(col_batch.Size(), dmat->Info().num_col_);
|
||||
EXPECT_EQ(col_batch[1][0].fvalue, 10.0f);
|
||||
EXPECT_EQ(col_batch[1].size(), 1);
|
||||
@@ -82,7 +82,7 @@ TEST(SparsePageDMatrix, ColAccessBatches) {
|
||||
};
|
||||
auto n_threads = omp_get_max_threads();
|
||||
omp_set_num_threads(16);
|
||||
for (auto const& page : dmat->GetColumnBatches()) {
|
||||
for (auto const& page : dmat->GetBatches<xgboost::CSCPage>()) {
|
||||
ASSERT_EQ(dmat->Info().num_col_, page.Size());
|
||||
}
|
||||
omp_set_num_threads(n_threads);
|
||||
|
||||
Reference in New Issue
Block a user