Rewrite sparse dmatrix using callbacks. (#7092)

- Reduce dependency on dmlc parsers and provide an interface for users to load data by themselves.
- Remove use of threaded iterator and IO queue.
- Remove `page_size`.
- Make sure the number of pages in memory is bounded.
- Make sure the cache can not be violated.
- Provide an interface for internal algorithms to process data asynchronously.
This commit is contained in:
Jiaming Yuan
2021-07-16 12:33:31 +08:00
committed by GitHub
parent 2f524e9f41
commit bd1f3a38f0
51 changed files with 1445 additions and 1391 deletions

View File

@@ -27,7 +27,7 @@ void VerifySampling(size_t page_size,
}
gpair.SetDevice(0);
BatchParam param{0, 256, page_size};
BatchParam param{0, 256};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
if (page_size != 0) {
EXPECT_NE(page->n_rows, kRows);
@@ -82,7 +82,7 @@ TEST(GradientBasedSampler, NoSamplingExternalMemory) {
auto gpair = GenerateRandomGradients(kRows);
gpair.SetDevice(0);
BatchParam param{0, 256, kPageSize};
BatchParam param{0, 256};
auto page = (*dmat->GetBatches<EllpackPage>(param).begin()).Impl();
EXPECT_NE(page->n_rows, kRows);