Deterministic data partitioning for external memory (#6317)

* Make external memory data partitioning deterministic.

* Change the meaning of `page_size` from bytes to number of rows.

* Design a data pool.

* Note for external memory.

* Enable unity build on Windows CI.

* Force garbage collect on test.
This commit is contained in:
Jiaming Yuan
2020-11-11 06:11:06 +08:00
committed by GitHub
parent 9564886d9f
commit 43efadea2e
15 changed files with 334 additions and 88 deletions

View File

@@ -830,9 +830,10 @@ void SparsePage::Push(const SparsePage &batch) {
const auto& batch_data_vec = batch.data.HostVector();
size_t top = offset_vec.back();
data_vec.resize(top + batch.data.Size());
std::memcpy(dmlc::BeginPtr(data_vec) + top,
dmlc::BeginPtr(batch_data_vec),
sizeof(Entry) * batch.data.Size());
if (dmlc::BeginPtr(data_vec) && dmlc::BeginPtr(batch_data_vec)) {
std::memcpy(dmlc::BeginPtr(data_vec) + top, dmlc::BeginPtr(batch_data_vec),
sizeof(Entry) * batch.data.Size());
}
size_t begin = offset.Size();
offset_vec.resize(begin + batch.Size());
for (size_t i = 0; i < batch.Size(); ++i) {