Group builder modified for incremental building (#5098)

This commit is contained in:
Rory Mitchell
2019-12-10 14:33:56 +13:00
committed by GitHub
parent 1cb6bcc382
commit 979f74d51a
5 changed files with 126 additions and 30 deletions

View File

@@ -35,7 +35,8 @@ TEST(group_data, ParallelGroupBuilder) {
EXPECT_EQ(offsets, expected_offsets);
// Create new builder, add one more row given already populated offsets/data
ParallelGroupBuilder<Entry, size_t> builder2(&offsets, &data);
ParallelGroupBuilder<Entry, size_t> builder2(&offsets, &data,
offsets.size() - 1);
builder2.InitBudget(0, 1);
builder2.AddBudget(2, 0, 2);
builder2.InitStorage();

View File

@@ -30,7 +30,7 @@ TEST(c_api, CSRAdapter) {
EXPECT_EQ(line2 .GetElement(0).row_idx, 2);
EXPECT_EQ(line2 .GetElement(0).column_idx, 1);
data::SimpleDMatrix dmat(&adapter, -1, std::nan(""));
data::SimpleDMatrix dmat(&adapter, std::nan(""), -1);
EXPECT_EQ(dmat.Info().num_col_, 2);
EXPECT_EQ(dmat.Info().num_row_, 3);
EXPECT_EQ(dmat.Info().num_nonzero_, 5);
@@ -51,7 +51,7 @@ TEST(c_api, DenseAdapter) {
int n = 2;
std::vector<float> data = {1, 2, 3, 4, 5, 6};
data::DenseAdapter adapter(data.data(), m, m*n, n);
data::SimpleDMatrix dmat(&adapter,-1,std::numeric_limits<float>::quiet_NaN());
data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), -1);
EXPECT_EQ(dmat.Info().num_col_, 2);
EXPECT_EQ(dmat.Info().num_row_, 3);
EXPECT_EQ(dmat.Info().num_nonzero_, 6);
@@ -73,7 +73,7 @@ TEST(c_api, CSCAdapter) {
std::vector<unsigned> row_idx = {0, 1, 0, 1, 2};
std::vector<size_t> col_ptr = {0, 2, 5};
data::CSCAdapter adapter(col_ptr.data(), row_idx.data(), data.data(), 2, 3);
data::SimpleDMatrix dmat(&adapter,-1,std::numeric_limits<float>::quiet_NaN());
data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), -1);
EXPECT_EQ(dmat.Info().num_col_, 2);
EXPECT_EQ(dmat.Info().num_row_, 3);
EXPECT_EQ(dmat.Info().num_nonzero_, 5);
@@ -96,6 +96,39 @@ TEST(c_api, CSCAdapter) {
EXPECT_EQ(inst[0].index, 1);
}
TEST(c_api, CSCAdapterColsMoreThanRows) {
std::vector<float> data = {1, 2, 3, 4, 5, 6, 7, 8};
std::vector<unsigned> row_idx = {0, 1, 0, 1, 0, 1, 0, 1};
std::vector<size_t> col_ptr = {0, 2, 4, 6, 8};
// Infer row count
data::CSCAdapter adapter(col_ptr.data(), row_idx.data(), data.data(), 4, 0);
data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(), -1);
EXPECT_EQ(dmat.Info().num_col_, 4);
EXPECT_EQ(dmat.Info().num_row_, 2);
EXPECT_EQ(dmat.Info().num_nonzero_, 8);
auto &batch = *dmat.GetBatches<SparsePage>().begin();
auto inst = batch[0];
EXPECT_EQ(inst[0].fvalue, 1);
EXPECT_EQ(inst[0].index, 0);
EXPECT_EQ(inst[1].fvalue, 3);
EXPECT_EQ(inst[1].index, 1);
EXPECT_EQ(inst[2].fvalue, 5);
EXPECT_EQ(inst[2].index, 2);
EXPECT_EQ(inst[3].fvalue, 7);
EXPECT_EQ(inst[3].index, 3);
inst = batch[1];
EXPECT_EQ(inst[0].fvalue, 2);
EXPECT_EQ(inst[0].index, 0);
EXPECT_EQ(inst[1].fvalue, 4);
EXPECT_EQ(inst[1].index, 1);
EXPECT_EQ(inst[2].fvalue, 6);
EXPECT_EQ(inst[2].index, 2);
EXPECT_EQ(inst[3].fvalue, 8);
EXPECT_EQ(inst[3].index, 3);
}
TEST(c_api, FileAdapter) {
std::string filename = "test.libsvm";
CreateBigTestData(filename, 10);

View File

@@ -126,3 +126,32 @@ TEST(SimpleDMatrix, EmptyRow) {
CHECK_EQ(dmat.Info().num_row_, 2);
CHECK_EQ(dmat.Info().num_col_, 2);
}
TEST(SimpleDMatrix, FromFile) {
std::string filename = "test.libsvm";
CreateBigTestData(filename, 3 * 5);
std::unique_ptr<dmlc::Parser<uint32_t>> parser(
dmlc::Parser<uint32_t>::Create(filename.c_str(), 0, 1, "auto"));
data::FileAdapter adapter(parser.get());
data::SimpleDMatrix dmat(&adapter, std::numeric_limits<float>::quiet_NaN(),
1);
for (auto &batch : dmat.GetBatches<SparsePage>()) {
EXPECT_EQ(batch.Size(), 5);
EXPECT_EQ(batch.offset.HostVector(),
std::vector<bst_row_t>({0, 3, 6, 9, 12, 15}));
EXPECT_EQ(batch.base_rowid, 0);
for (auto i = 0ull; i < batch.Size(); i++) {
if (i%2== 0) {
EXPECT_EQ(batch[i][0].index, 0);
EXPECT_EQ(batch[i][1].index, 1);
EXPECT_EQ(batch[i][2].index, 2);
}
else {
EXPECT_EQ(batch[i][0].index, 0);
EXPECT_EQ(batch[i][1].index, 3);
EXPECT_EQ(batch[i][2].index, 4);
}
}
}
}