Use view for SparsePage exclusively. (#6590)
This commit is contained in:
@@ -47,7 +47,8 @@ TEST(Adapter, CSCAdapterColsMoreThanRows) {
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, 8);
|
||||
|
||||
auto &batch = *dmat.GetBatches<SparsePage>().begin();
|
||||
auto inst = batch[0];
|
||||
auto page = batch.GetView();
|
||||
auto inst = page[0];
|
||||
EXPECT_EQ(inst[0].fvalue, 1);
|
||||
EXPECT_EQ(inst[0].index, 0);
|
||||
EXPECT_EQ(inst[1].fvalue, 3);
|
||||
@@ -57,7 +58,7 @@ TEST(Adapter, CSCAdapterColsMoreThanRows) {
|
||||
EXPECT_EQ(inst[3].fvalue, 7);
|
||||
EXPECT_EQ(inst[3].index, 3);
|
||||
|
||||
inst = batch[1];
|
||||
inst = page[1];
|
||||
EXPECT_EQ(inst[0].fvalue, 2);
|
||||
EXPECT_EQ(inst[0].index, 0);
|
||||
EXPECT_EQ(inst[1].fvalue, 4);
|
||||
|
||||
@@ -11,9 +11,9 @@ namespace xgboost {
|
||||
TEST(SparsePage, PushCSC) {
|
||||
std::vector<bst_row_t> offset {0};
|
||||
std::vector<Entry> data;
|
||||
SparsePage page;
|
||||
page.offset.HostVector() = offset;
|
||||
page.data.HostVector() = data;
|
||||
SparsePage batch;
|
||||
batch.offset.HostVector() = offset;
|
||||
batch.data.HostVector() = data;
|
||||
|
||||
offset = {0, 1, 4};
|
||||
for (size_t i = 0; i < offset.back(); ++i) {
|
||||
@@ -24,25 +24,26 @@ TEST(SparsePage, PushCSC) {
|
||||
other.offset.HostVector() = offset;
|
||||
other.data.HostVector() = data;
|
||||
|
||||
page.PushCSC(other);
|
||||
batch.PushCSC(other);
|
||||
|
||||
ASSERT_EQ(page.offset.HostVector().size(), offset.size());
|
||||
ASSERT_EQ(page.data.HostVector().size(), data.size());
|
||||
ASSERT_EQ(batch.offset.HostVector().size(), offset.size());
|
||||
ASSERT_EQ(batch.data.HostVector().size(), data.size());
|
||||
for (size_t i = 0; i < offset.size(); ++i) {
|
||||
ASSERT_EQ(page.offset.HostVector()[i], offset[i]);
|
||||
ASSERT_EQ(batch.offset.HostVector()[i], offset[i]);
|
||||
}
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
ASSERT_EQ(page.data.HostVector()[i].index, data[i].index);
|
||||
ASSERT_EQ(batch.data.HostVector()[i].index, data[i].index);
|
||||
}
|
||||
|
||||
page.PushCSC(other);
|
||||
ASSERT_EQ(page.offset.HostVector().size(), offset.size());
|
||||
ASSERT_EQ(page.data.Size(), data.size() * 2);
|
||||
batch.PushCSC(other);
|
||||
ASSERT_EQ(batch.offset.HostVector().size(), offset.size());
|
||||
ASSERT_EQ(batch.data.Size(), data.size() * 2);
|
||||
|
||||
for (size_t i = 0; i < offset.size(); ++i) {
|
||||
ASSERT_EQ(page.offset.HostVector()[i], offset[i] * 2);
|
||||
ASSERT_EQ(batch.offset.HostVector()[i], offset[i] * 2);
|
||||
}
|
||||
|
||||
auto page = batch.GetView();
|
||||
auto inst = page[0];
|
||||
ASSERT_EQ(inst.size(), 2ul);
|
||||
for (auto entry : inst) {
|
||||
@@ -78,7 +79,7 @@ TEST(SparsePage, PushCSCAfterTranspose) {
|
||||
// The feature value for a feature in each row should be identical, as that is
|
||||
// how the dmatrix has been created
|
||||
for (size_t i = 0; i < page.Size(); ++i) {
|
||||
auto inst = page[i];
|
||||
auto inst = page.GetView()[i];
|
||||
for (size_t j = 1; j < inst.size(); ++j) {
|
||||
ASSERT_EQ(inst[0].fvalue, inst[j].fvalue);
|
||||
}
|
||||
|
||||
@@ -39,7 +39,8 @@ TEST(SimpleDMatrix, RowAccess) {
|
||||
EXPECT_EQ(row_count, dmat->Info().num_row_);
|
||||
// Test the data read into the first row
|
||||
auto &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
auto first_row = batch[0];
|
||||
auto page = batch.GetView();
|
||||
auto first_row = page[0];
|
||||
ASSERT_EQ(first_row.size(), 3);
|
||||
EXPECT_EQ(first_row[2].index, 2);
|
||||
EXPECT_EQ(first_row[2].fvalue, 20);
|
||||
@@ -143,8 +144,9 @@ TEST(SimpleDMatrix, FromDense) {
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, 6);
|
||||
|
||||
for (auto &batch : dmat.GetBatches<SparsePage>()) {
|
||||
auto page = batch.GetView();
|
||||
for (auto i = 0ull; i < batch.Size(); i++) {
|
||||
auto inst = batch[i];
|
||||
auto inst = page[i];
|
||||
for (auto j = 0ull; j < inst.size(); j++) {
|
||||
EXPECT_EQ(inst[j].fvalue, data[i * n + j]);
|
||||
EXPECT_EQ(inst[j].index, j);
|
||||
@@ -165,19 +167,20 @@ TEST(SimpleDMatrix, FromCSC) {
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, 5);
|
||||
|
||||
auto &batch = *dmat.GetBatches<SparsePage>().begin();
|
||||
auto inst = batch[0];
|
||||
auto page = batch.GetView();
|
||||
auto inst = page[0];
|
||||
EXPECT_EQ(inst[0].fvalue, 1);
|
||||
EXPECT_EQ(inst[0].index, 0);
|
||||
EXPECT_EQ(inst[1].fvalue, 2);
|
||||
EXPECT_EQ(inst[1].index, 1);
|
||||
|
||||
inst = batch[1];
|
||||
inst = page[1];
|
||||
EXPECT_EQ(inst[0].fvalue, 3);
|
||||
EXPECT_EQ(inst[0].index, 0);
|
||||
EXPECT_EQ(inst[1].fvalue, 4);
|
||||
EXPECT_EQ(inst[1].index, 1);
|
||||
|
||||
inst = batch[2];
|
||||
inst = page[2];
|
||||
EXPECT_EQ(inst[0].fvalue, 5);
|
||||
EXPECT_EQ(inst[0].index, 1);
|
||||
}
|
||||
@@ -194,11 +197,12 @@ TEST(SimpleDMatrix, FromFile) {
|
||||
std::unique_ptr<dmlc::Parser<uint32_t>> parser(
|
||||
dmlc::Parser<uint32_t>::Create(filename.c_str(), 0, 1, "auto"));
|
||||
|
||||
auto verify_batch = [kExpectedNumRow](SparsePage const &batch) {
|
||||
auto verify_batch = [kExpectedNumRow](SparsePage const &page) {
|
||||
auto batch = page.GetView();
|
||||
EXPECT_EQ(batch.Size(), kExpectedNumRow);
|
||||
EXPECT_EQ(batch.offset.HostVector(),
|
||||
EXPECT_EQ(page.offset.HostVector(),
|
||||
std::vector<bst_row_t>({0, 3, 6, 9, 12, 15, 15}));
|
||||
EXPECT_EQ(batch.base_rowid, 0);
|
||||
EXPECT_EQ(page.base_rowid, 0);
|
||||
|
||||
for (auto i = 0ull; i < batch.Size() - 1; i++) {
|
||||
if (i % 2 == 0) {
|
||||
@@ -251,8 +255,10 @@ TEST(SimpleDMatrix, Slice) {
|
||||
ASSERT_EQ(out->Info().labels_upper_bound_.Size(), ridxs.size());
|
||||
ASSERT_EQ(out->Info().base_margin_.Size(), ridxs.size() * kClasses);
|
||||
|
||||
for (auto const& in_page : p_m->GetBatches<SparsePage>()) {
|
||||
for (auto const &out_page : out->GetBatches<SparsePage>()) {
|
||||
for (auto const& in_batch : p_m->GetBatches<SparsePage>()) {
|
||||
auto in_page = in_batch.GetView();
|
||||
for (auto const &out_batch : out->GetBatches<SparsePage>()) {
|
||||
auto out_page = out_batch.GetView();
|
||||
for (size_t i = 0; i < ridxs.size(); ++i) {
|
||||
auto ridx = ridxs[i];
|
||||
auto out_inst = out_page[i];
|
||||
@@ -305,8 +311,8 @@ TEST(SimpleDMatrix, SaveLoadBinary) {
|
||||
auto row_iter = dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
auto row_iter_read = dmat_read->GetBatches<xgboost::SparsePage>().begin();
|
||||
// Test the data read into the first row
|
||||
auto first_row = (*row_iter)[0];
|
||||
auto first_row_read = (*row_iter_read)[0];
|
||||
auto first_row = (*row_iter).GetView()[0];
|
||||
auto first_row_read = (*row_iter_read).GetView()[0];
|
||||
EXPECT_EQ(first_row.size(), first_row_read.size());
|
||||
EXPECT_EQ(first_row[2].index, first_row_read[2].index);
|
||||
EXPECT_EQ(first_row[2].fvalue, first_row_read[2].fvalue);
|
||||
|
||||
@@ -35,8 +35,9 @@ TEST(SimpleDMatrix, FromColumnarDenseBasic) {
|
||||
|
||||
void TestDenseColumn(DMatrix* dmat, size_t n_rows, size_t n_cols) {
|
||||
for (auto& batch : dmat->GetBatches<SparsePage>()) {
|
||||
auto page = batch.GetView();
|
||||
for (auto i = 0ull; i < batch.Size(); i++) {
|
||||
auto inst = batch[i];
|
||||
auto inst = page[i];
|
||||
for (auto j = 0ull; j < inst.size(); j++) {
|
||||
EXPECT_EQ(inst[j].fvalue, i * 2);
|
||||
EXPECT_EQ(inst[j].index, j);
|
||||
@@ -162,8 +163,9 @@ TEST(SimpleDMatrix, FromColumnarWithEmptyRows) {
|
||||
-1);
|
||||
|
||||
for (auto& batch : dmat.GetBatches<SparsePage>()) {
|
||||
auto page = batch.GetView();
|
||||
for (auto i = 0ull; i < batch.Size(); i++) {
|
||||
auto inst = batch[i];
|
||||
auto inst = page[i];
|
||||
for (auto j = 0ull; j < inst.size(); j++) {
|
||||
EXPECT_EQ(inst[j].fvalue, i);
|
||||
EXPECT_EQ(inst[j].index, j);
|
||||
@@ -257,8 +259,9 @@ TEST(SimpleCSRSource, FromColumnarSparse) {
|
||||
data::CudfAdapter adapter(str);
|
||||
data::SimpleDMatrix dmat(&adapter, 2.0, -1);
|
||||
for (auto& batch : dmat.GetBatches<SparsePage>()) {
|
||||
auto page = batch.GetView();
|
||||
for (auto i = 0ull; i < batch.Size(); i++) {
|
||||
auto inst = batch[i];
|
||||
auto inst = page[i];
|
||||
for (auto e : inst) {
|
||||
ASSERT_NE(e.fvalue, 2.0);
|
||||
}
|
||||
@@ -304,8 +307,9 @@ TEST(SimpleDMatrix, FromColumnarSparseBasic) {
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, 32);
|
||||
|
||||
for (auto& batch : dmat.GetBatches<SparsePage>()) {
|
||||
auto page = batch.GetView();
|
||||
for (auto i = 0ull; i < batch.Size(); i++) {
|
||||
auto inst = batch[i];
|
||||
auto inst = page[i];
|
||||
for (auto j = 0ull; j < inst.size(); j++) {
|
||||
EXPECT_EQ(inst[j].fvalue, i * 2);
|
||||
EXPECT_EQ(inst[j].index, j);
|
||||
@@ -329,8 +333,9 @@ TEST(SimpleDMatrix, FromCupy){
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, rows*cols);
|
||||
|
||||
for (auto& batch : dmat.GetBatches<SparsePage>()) {
|
||||
auto page = batch.GetView();
|
||||
for (auto i = 0ull; i < batch.Size(); i++) {
|
||||
auto inst = batch[i];
|
||||
auto inst = page[i];
|
||||
for (auto j = 0ull; j < inst.size(); j++) {
|
||||
EXPECT_EQ(inst[j].fvalue, i * cols + j);
|
||||
EXPECT_EQ(inst[j].index, j);
|
||||
@@ -354,12 +359,14 @@ TEST(SimpleDMatrix, FromCupySparse){
|
||||
EXPECT_EQ(dmat.Info().num_row_, rows);
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, rows * cols - 2);
|
||||
auto& batch = *dmat.GetBatches<SparsePage>().begin();
|
||||
auto inst0 = batch[0];
|
||||
auto inst1 = batch[1];
|
||||
EXPECT_EQ(batch[0].size(), 1);
|
||||
EXPECT_EQ(batch[1].size(), 1);
|
||||
EXPECT_EQ(batch[0][0].fvalue, 0.0f);
|
||||
EXPECT_EQ(batch[0][0].index, 0);
|
||||
EXPECT_EQ(batch[1][0].fvalue, 3.0f);
|
||||
EXPECT_EQ(batch[1][0].index, 1);
|
||||
auto page = batch.GetView();
|
||||
|
||||
auto inst0 = page[0];
|
||||
auto inst1 = page[1];
|
||||
EXPECT_EQ(page[0].size(), 1);
|
||||
EXPECT_EQ(page[1].size(), 1);
|
||||
EXPECT_EQ(page[0][0].fvalue, 0.0f);
|
||||
EXPECT_EQ(page[0][0].index, 0);
|
||||
EXPECT_EQ(page[1][0].fvalue, 3.0f);
|
||||
EXPECT_EQ(page[1][0].index, 1);
|
||||
}
|
||||
|
||||
@@ -39,7 +39,8 @@ TEST(SparsePageDMatrix, RowAccess) {
|
||||
|
||||
// Test the data read into the first row
|
||||
auto &batch = *dmat->GetBatches<xgboost::SparsePage>().begin();
|
||||
auto first_row = batch[0];
|
||||
auto page = batch.GetView();
|
||||
auto first_row = page[0];
|
||||
ASSERT_EQ(first_row.size(), 3ul);
|
||||
EXPECT_EQ(first_row[2].index, 2u);
|
||||
EXPECT_EQ(first_row[2].fvalue, 20);
|
||||
@@ -54,16 +55,18 @@ TEST(SparsePageDMatrix, ColAccess) {
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
for (auto const &col_batch : dmat->GetBatches<xgboost::SortedCSCPage>()) {
|
||||
EXPECT_EQ(col_batch.Size(), dmat->Info().num_col_);
|
||||
EXPECT_EQ(col_batch[1][0].fvalue, 10.0f);
|
||||
EXPECT_EQ(col_batch[1].size(), 1);
|
||||
auto col_page = col_batch.GetView();
|
||||
EXPECT_EQ(col_page.Size(), dmat->Info().num_col_);
|
||||
EXPECT_EQ(col_page[1][0].fvalue, 10.0f);
|
||||
EXPECT_EQ(col_page[1].size(), 1);
|
||||
}
|
||||
|
||||
// Loop over the batches and assert the data is as expected
|
||||
for (auto const &col_batch : dmat->GetBatches<xgboost::CSCPage>()) {
|
||||
EXPECT_EQ(col_batch.Size(), dmat->Info().num_col_);
|
||||
EXPECT_EQ(col_batch[1][0].fvalue, 10.0f);
|
||||
EXPECT_EQ(col_batch[1].size(), 1);
|
||||
auto col_page = col_batch.GetView();
|
||||
EXPECT_EQ(col_page.Size(), dmat->Info().num_col_);
|
||||
EXPECT_EQ(col_page[1][0].fvalue, 10.0f);
|
||||
EXPECT_EQ(col_page[1].size(), 1);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(FileExists(tmp_file + ".cache"));
|
||||
@@ -238,8 +241,9 @@ TEST(SparsePageDMatrix, FromDense) {
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, 6);
|
||||
|
||||
for (auto &batch : dmat.GetBatches<SparsePage>()) {
|
||||
auto page = batch.GetView();
|
||||
for (auto i = 0ull; i < batch.Size(); i++) {
|
||||
auto inst = batch[i];
|
||||
auto inst = page[i];
|
||||
for (auto j = 0ull; j < inst.size(); j++) {
|
||||
EXPECT_EQ(inst[j].fvalue, data[i * n + j]);
|
||||
EXPECT_EQ(inst[j].index, j);
|
||||
@@ -262,19 +266,20 @@ TEST(SparsePageDMatrix, FromCSC) {
|
||||
EXPECT_EQ(dmat.Info().num_nonzero_, 5);
|
||||
|
||||
auto &batch = *dmat.GetBatches<SparsePage>().begin();
|
||||
auto inst = batch[0];
|
||||
auto page = batch.GetView();
|
||||
auto inst = page[0];
|
||||
EXPECT_EQ(inst[0].fvalue, 1);
|
||||
EXPECT_EQ(inst[0].index, 0);
|
||||
EXPECT_EQ(inst[1].fvalue, 2);
|
||||
EXPECT_EQ(inst[1].index, 1);
|
||||
|
||||
inst = batch[1];
|
||||
inst = page[1];
|
||||
EXPECT_EQ(inst[0].fvalue, 3);
|
||||
EXPECT_EQ(inst[0].index, 0);
|
||||
EXPECT_EQ(inst[1].fvalue, 4);
|
||||
EXPECT_EQ(inst[1].index, 1);
|
||||
|
||||
inst = batch[2];
|
||||
inst = page[2];
|
||||
EXPECT_EQ(inst[0].fvalue, 5);
|
||||
EXPECT_EQ(inst[0].index, 1);
|
||||
}
|
||||
@@ -294,19 +299,20 @@ TEST(SparsePageDMatrix, FromFile) {
|
||||
|
||||
for (auto &batch : dmat.GetBatches<SparsePage>()) {
|
||||
std::vector<bst_row_t> expected_offset(batch.Size() + 1);
|
||||
auto page = batch.GetView();
|
||||
int n = -3;
|
||||
std::generate(expected_offset.begin(), expected_offset.end(),
|
||||
[&n] { return n += 3; });
|
||||
EXPECT_EQ(batch.offset.HostVector(), expected_offset);
|
||||
|
||||
if (batch.base_rowid % 2 == 0) {
|
||||
EXPECT_EQ(batch[0][0].index, 0);
|
||||
EXPECT_EQ(batch[0][1].index, 1);
|
||||
EXPECT_EQ(batch[0][2].index, 2);
|
||||
EXPECT_EQ(page[0][0].index, 0);
|
||||
EXPECT_EQ(page[0][1].index, 1);
|
||||
EXPECT_EQ(page[0][2].index, 2);
|
||||
} else {
|
||||
EXPECT_EQ(batch[0][0].index, 0);
|
||||
EXPECT_EQ(batch[0][1].index, 3);
|
||||
EXPECT_EQ(batch[0][2].index, 4);
|
||||
EXPECT_EQ(page[0][0].index, 0);
|
||||
EXPECT_EQ(page[0][1].index, 3);
|
||||
EXPECT_EQ(page[0][2].index, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user