Rewrite sparse dmatrix using callbacks. (#7092)
- Reduce dependency on dmlc parsers and provide an interface for users to load data by themselves. - Remove use of threaded iterator and IO queue. - Remove `page_size`. - Make sure the number of pages in memory is bounded. - Make sure the cache can not be violated. - Provide an interface for internal algorithms to process data asynchronously.
This commit is contained in:
@@ -125,12 +125,10 @@ TEST(DenseColumnWithMissing, Test) {
|
||||
}
|
||||
|
||||
void TestGHistIndexMatrixCreation(size_t nthreads) {
|
||||
dmlc::TemporaryDirectory tmpdir;
|
||||
std::string filename = tmpdir.path + "/big.libsvm";
|
||||
size_t constexpr kPageSize = 1024, kEntriesPerCol = 3;
|
||||
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
|
||||
/* This should create multiple sparse pages */
|
||||
std::unique_ptr<DMatrix> dmat{ CreateSparsePageDMatrix(kEntries, kPageSize, filename) };
|
||||
std::unique_ptr<DMatrix> dmat{ CreateSparsePageDMatrix(kEntries) };
|
||||
omp_set_num_threads(nthreads);
|
||||
GHistIndexMatrix gmat(dmat.get(), 256);
|
||||
}
|
||||
|
||||
@@ -83,7 +83,7 @@ inline std::shared_ptr<DMatrix> GetExternalMemoryDMatrixFromData(
|
||||
}
|
||||
fo.close();
|
||||
return std::shared_ptr<DMatrix>(DMatrix::Load(
|
||||
tmp_file + "#" + tmp_file + ".cache", true, false, "auto", page_size));
|
||||
tmp_file + "#" + tmp_file + ".cache", true, false, "auto"));
|
||||
}
|
||||
|
||||
// Test that elements are approximately equally distributed among bins
|
||||
|
||||
Reference in New Issue
Block a user