Rewrite sparse dmatrix using callbacks. (#7092)
- Reduce dependency on dmlc parsers and provide an interface for users to load data by themselves. - Remove use of threaded iterator and IO queue. - Remove `page_size`. - Make sure the number of pages in memory is bounded. - Make sure the cache can not be violated. - Provide an interface for internal algorithms to process data asynchronously.
This commit is contained in:
@@ -8,16 +8,16 @@ namespace xgboost {
|
||||
|
||||
CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,
|
||||
size_t cols, size_t batches)
|
||||
: rows_{rows}, cols_{cols}, n_batches_{batches} {
|
||||
XGProxyDMatrixCreate(&proxy_);
|
||||
rng_.reset(new RandomDataGenerator{rows_, cols_, sparsity});
|
||||
: ArrayIterForTest{sparsity, rows, cols, batches} {
|
||||
rng_->Device(0);
|
||||
std::tie(batches_, interface_) =
|
||||
rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
|
||||
this->Reset();
|
||||
}
|
||||
|
||||
CudaArrayIterForTest::~CudaArrayIterForTest() { XGDMatrixFree(proxy_); }
|
||||
size_t constexpr CudaArrayIterForTest::kRows;
|
||||
size_t constexpr CudaArrayIterForTest::kCols;
|
||||
size_t constexpr CudaArrayIterForTest::kBatches;
|
||||
|
||||
int CudaArrayIterForTest::Next() {
|
||||
if (iter_ == n_batches_) {
|
||||
@@ -28,8 +28,6 @@ int CudaArrayIterForTest::Next() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t constexpr CudaArrayIterForTest::kRows;
|
||||
size_t constexpr CudaArrayIterForTest::kCols;
|
||||
|
||||
std::shared_ptr<DMatrix> RandomDataGenerator::GenerateDeviceDMatrix(bool with_label,
|
||||
bool float_label,
|
||||
|
||||
Reference in New Issue
Block a user