Rewrite sparse dmatrix using callbacks. (#7092)

- Reduce dependency on dmlc parsers and provide an interface for users to load data by themselves.
- Remove use of threaded iterator and IO queue.
- Remove `page_size`.
- Make sure the number of pages in memory is bounded.
- Make sure the cache can not be violated.
- Provide an interface for internal algorithms to process data asynchronously.
This commit is contained in:
Jiaming Yuan
2021-07-16 12:33:31 +08:00
committed by GitHub
parent 2f524e9f41
commit bd1f3a38f0
51 changed files with 1445 additions and 1391 deletions

View File

@@ -163,7 +163,7 @@ class DeviceHistogram {
template <typename GradientSumT>
struct GPUHistMakerDevice {
int device_id;
EllpackPageImpl* page;
EllpackPageImpl const* page;
common::Span<FeatureType const> feature_types;
BatchParam batch_param;
@@ -199,7 +199,7 @@ struct GPUHistMakerDevice {
dh::caching_device_vector<uint32_t> node_categories;
GPUHistMakerDevice(int _device_id,
EllpackPageImpl* _page,
EllpackPageImpl const* _page,
common::Span<FeatureType const> _feature_types,
bst_uint _n_rows,
TrainParam _param,
@@ -488,7 +488,7 @@ struct GPUHistMakerDevice {
}
}
void FinalisePositionInPage(EllpackPageImpl *page,
void FinalisePositionInPage(EllpackPageImpl const *page,
const common::Span<RegTree::Node> d_nodes,
common::Span<FeatureType const> d_feature_types,
common::Span<uint32_t const> categories,
@@ -812,7 +812,6 @@ class GPUHistMakerSpecialised {
BatchParam batch_param{
device_,
param_.max_bin,
generic_param_->gpu_page_size
};
auto page = (*dmat->GetBatches<EllpackPage>(batch_param).begin()).Impl();
dh::safe_cuda(cudaSetDevice(device_));