[BLOCKING] Handle empty rows in data iterators correctly (#5929)
* [jvm-packages] Handle empty rows in data iterators correctly * Fix clang-tidy error * last empty row * Add comments [skip ci] Co-authored-by: Nan Zhu <nanzhu@uber.com>
This commit is contained in:
committed by
GitHub
parent
a4de2f68e4
commit
487ab0ce73
@@ -26,12 +26,13 @@ TEST(Adapter, CSRAdapter) {
|
||||
EXPECT_EQ(line0.GetElement(1).value, 2);
|
||||
|
||||
auto line1 = batch.GetLine(1);
|
||||
EXPECT_EQ(line1 .GetElement(0).value, 3);
|
||||
EXPECT_EQ(line1 .GetElement(1).value, 4);
|
||||
EXPECT_EQ(line1.GetElement(0).value, 3);
|
||||
EXPECT_EQ(line1.GetElement(1).value, 4);
|
||||
|
||||
auto line2 = batch.GetLine(2);
|
||||
EXPECT_EQ(line2 .GetElement(0).value, 5);
|
||||
EXPECT_EQ(line2 .GetElement(0).row_idx, 2);
|
||||
EXPECT_EQ(line2 .GetElement(0).column_idx, 1);
|
||||
EXPECT_EQ(line2.GetElement(0).value, 5);
|
||||
EXPECT_EQ(line2.GetElement(0).row_idx, 2);
|
||||
EXPECT_EQ(line2.GetElement(0).column_idx, 1);
|
||||
}
|
||||
|
||||
TEST(Adapter, CSCAdapterColsMoreThanRows) {
|
||||
@@ -73,10 +74,11 @@ class CSRIterForTest {
|
||||
std::vector<std::remove_pointer<decltype(std::declval<XGBoostBatchCSR>().index)>::type>
|
||||
feature_idx_ {0, 1, 0, 1, 1};
|
||||
std::vector<std::remove_pointer<decltype(std::declval<XGBoostBatchCSR>().offset)>::type>
|
||||
row_ptr_ {0, 2, 4, 5};
|
||||
row_ptr_ {0, 2, 4, 5, 5};
|
||||
size_t iter_ {0};
|
||||
|
||||
public:
|
||||
size_t static constexpr kRows { 4 }; // Test for the last row being empty
|
||||
size_t static constexpr kCols { 13 }; // Test for having some missing columns
|
||||
|
||||
XGBoostBatchCSR Next() {
|
||||
@@ -88,7 +90,7 @@ class CSRIterForTest {
|
||||
batch.offset = dmlc::BeginPtr(row_ptr_);
|
||||
batch.index = dmlc::BeginPtr(feature_idx_);
|
||||
batch.value = dmlc::BeginPtr(data_);
|
||||
batch.size = 3;
|
||||
batch.size = kRows;
|
||||
|
||||
batch.label = nullptr;
|
||||
batch.weight = nullptr;
|
||||
@@ -117,16 +119,23 @@ int CSRSetDataNextForTest(DataIterHandle data_handle,
|
||||
}
|
||||
}
|
||||
|
||||
TEST(Adapter, IteratorAdaper) {
|
||||
TEST(Adapter, IteratorAdapter) {
|
||||
CSRIterForTest iter;
|
||||
data::IteratorAdapter<DataIterHandle, XGBCallbackDataIterNext,
|
||||
XGBoostBatchCSR> adapter{&iter, CSRSetDataNextForTest};
|
||||
constexpr size_t kRows { 6 };
|
||||
constexpr size_t kRows { 8 };
|
||||
|
||||
std::unique_ptr<DMatrix> data {
|
||||
DMatrix::Create(&adapter, std::numeric_limits<float>::quiet_NaN(), 1)
|
||||
};
|
||||
ASSERT_EQ(data->Info().num_col_, CSRIterForTest::kCols);
|
||||
ASSERT_EQ(data->Info().num_row_, kRows);
|
||||
int num_batch = 0;
|
||||
for (auto const& batch : data->GetBatches<SparsePage>()) {
|
||||
ASSERT_EQ(batch.offset.HostVector(), std::vector<bst_row_t>({0, 2, 4, 5, 5, 7, 9, 10, 10}));
|
||||
++num_batch;
|
||||
}
|
||||
ASSERT_EQ(num_batch, 1);
|
||||
}
|
||||
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user