Rewrite sparse dmatrix using callbacks. (#7092)

- Reduce dependency on dmlc parsers and provide an interface for users to load data by themselves.
- Remove use of threaded iterator and IO queue.
- Remove `page_size`.
- Make sure the number of pages in memory is bounded.
- Make sure the cache can not be violated.
- Provide an interface for internal algorithms to process data asynchronously.
This commit is contained in:
Jiaming Yuan
2021-07-16 12:33:31 +08:00
committed by GitHub
parent 2f524e9f41
commit bd1f3a38f0
51 changed files with 1445 additions and 1391 deletions

View File

@@ -98,8 +98,7 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT
const std::string tmp_file = tempdir.path + "/big.libsvm";
CreateBigTestData(tmp_file, 50000);
std::shared_ptr<DMatrix> dmat(xgboost::DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", true, false, "auto", 100));
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
tmp_file + "#" + tmp_file + ".cache", true, false, "auto"));
EXPECT_FALSE(dmat->SingleColBlock());
size_t num_row = dmat->Info().num_row_;
std::vector<bst_float> labels(num_row);