Deterministic data partitioning for external memory (#6317)
* Make external memory data partitioning deterministic. * Change the meaning of `page_size` from bytes to number of rows. * Design a data pool. * Note for external memory. * Enable unity build on Windows CI. * Force garbage collect on test.
This commit is contained in:
@@ -373,12 +373,8 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix(
|
||||
batch_count++;
|
||||
row_count += batch.Size();
|
||||
}
|
||||
#if defined(_OPENMP)
|
||||
EXPECT_GE(batch_count, 2);
|
||||
EXPECT_EQ(row_count, dmat->Info().num_row_);
|
||||
#else
|
||||
#warning "External memory doesn't work with Non-OpenMP build "
|
||||
#endif // defined(_OPENMP)
|
||||
return dmat;
|
||||
}
|
||||
|
||||
@@ -495,6 +491,36 @@ std::unique_ptr<GradientBooster> CreateTrainedGBM(
|
||||
return gbm;
|
||||
}
|
||||
|
||||
void DMatrixToCSR(DMatrix *dmat, std::vector<float> *p_data,
|
||||
std::vector<size_t> *p_row_ptr,
|
||||
std::vector<bst_feature_t> *p_cids) {
|
||||
auto &data = *p_data;
|
||||
auto &row_ptr = *p_row_ptr;
|
||||
auto &cids = *p_cids;
|
||||
|
||||
data.resize(dmat->Info().num_nonzero_);
|
||||
cids.resize(data.size());
|
||||
row_ptr.resize(dmat->Info().num_row_ + 1);
|
||||
SparsePage page;
|
||||
for (const auto &batch : dmat->GetBatches<SparsePage>()) {
|
||||
page.Push(batch);
|
||||
}
|
||||
|
||||
auto const& in_offset = page.offset.HostVector();
|
||||
auto const& in_data = page.data.HostVector();
|
||||
|
||||
CHECK_EQ(in_offset.size(), row_ptr.size());
|
||||
std::copy(in_offset.cbegin(), in_offset.cend(), row_ptr.begin());
|
||||
ASSERT_EQ(in_data.size(), data.size());
|
||||
std::transform(in_data.cbegin(), in_data.cend(), data.begin(), [](Entry const& e) {
|
||||
return e.fvalue;
|
||||
});
|
||||
ASSERT_EQ(in_data.size(), cids.size());
|
||||
std::transform(in_data.cbegin(), in_data.cend(), cids.begin(), [](Entry const& e) {
|
||||
return e.index;
|
||||
});
|
||||
}
|
||||
|
||||
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
|
||||
|
||||
using CUDAMemoryResource = rmm::mr::cuda_memory_resource;
|
||||
|
||||
Reference in New Issue
Block a user