Rewrite sparse dmatrix using callbacks. (#7092)

- Reduce dependency on dmlc parsers and provide an interface for users to load data by themselves. - Remove use of threaded iterator and IO queue. - Remove `page_size`. - Make sure the number of pages in memory is bounded. - Make sure the cache can not be violated. - Provide an interface for internal algorithms to process data asynchronously.
2021-07-16 12:33:31 +08:00
parent 2f524e9f41
commit bd1f3a38f0
51 changed files with 1445 additions and 1391 deletions
--- a/tests/cpp/helpers.cu
+++ b/tests/cpp/helpers.cu
@@ -8,16 +8,16 @@ namespace xgboost {

 CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,
                                           size_t cols, size_t batches)
-    : rows_{rows}, cols_{cols}, n_batches_{batches} {
-  XGProxyDMatrixCreate(&proxy_);
-  rng_.reset(new RandomDataGenerator{rows_, cols_, sparsity});
+    : ArrayIterForTest{sparsity, rows, cols, batches} {
  rng_->Device(0);
  std::tie(batches_, interface_) =
      rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
  this->Reset();
 }

-CudaArrayIterForTest::~CudaArrayIterForTest() { XGDMatrixFree(proxy_); }
+size_t constexpr CudaArrayIterForTest::kRows;
+size_t constexpr CudaArrayIterForTest::kCols;
+size_t constexpr CudaArrayIterForTest::kBatches;

 int CudaArrayIterForTest::Next() {
  if (iter_ == n_batches_) {
@@ -28,8 +28,6 @@ int CudaArrayIterForTest::Next() {
  return 1;
 }

-size_t constexpr CudaArrayIterForTest::kRows;
-size_t constexpr CudaArrayIterForTest::kCols;

 std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDeviceDMatrix(bool with_label,
                                                                    bool float_label,