Dmatrix refactor stage 1 (#3301)

* Use sparse page as singular CSR matrix representation

* Simplify dmatrix methods

* Reduce statefullness of batch iterators

* BREAKING CHANGE: Remove prob_buffer_row parameter. Users are instead recommended to sample their dataset as a preprocessing step before using XGBoost.
This commit is contained in:
Rory Mitchell
2018-06-07 10:25:58 +12:00
committed by GitHub
parent 286dccb8e8
commit a96039141a
47 changed files with 650 additions and 1036 deletions

View File

@@ -18,13 +18,13 @@ TEST(SimpleCSRSource, SaveLoadBinary) {
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
EXPECT_EQ(dmat->Info().num_row_, dmat_read->Info().num_row_);
dmlc::DataIter<xgboost::RowBatch> * row_iter = dmat->RowIterator();
dmlc::DataIter<xgboost::RowBatch> * row_iter_read = dmat_read->RowIterator();
auto row_iter = dmat->RowIterator();
auto row_iter_read = dmat_read->RowIterator();
// Test the data read into the first row
row_iter->BeforeFirst(); row_iter->Next();
row_iter_read->BeforeFirst(); row_iter_read->Next();
xgboost::SparseBatch::Inst first_row = row_iter->Value()[0];
xgboost::SparseBatch::Inst first_row_read = row_iter_read->Value()[0];
auto first_row = row_iter->Value()[0];
auto first_row_read = row_iter_read->Value()[0];
EXPECT_EQ(first_row.length, first_row_read.length);
EXPECT_EQ(first_row[2].index, first_row_read[2].index);
EXPECT_EQ(first_row[2].fvalue, first_row_read[2].fvalue);