Rewrite sparse dmatrix using callbacks. (#7092)

- Reduce dependency on dmlc parsers and provide an interface for users to load data by themselves. - Remove use of threaded iterator and IO queue. - Remove `page_size`. - Make sure the number of pages in memory is bounded. - Make sure the cache can not be violated. - Provide an interface for internal algorithms to process data asynchronously.
2021-07-16 12:33:31 +08:00
parent 2f524e9f41
commit bd1f3a38f0
51 changed files with 1445 additions and 1391 deletions
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -163,7 +163,7 @@ class DeviceHistogram {
 template <typename GradientSumT>
 struct GPUHistMakerDevice {
  int device_id;
-  EllpackPageImpl* page;
+  EllpackPageImpl const* page;
  common::Span<FeatureType const> feature_types;
  BatchParam batch_param;

@@ -199,7 +199,7 @@ struct GPUHistMakerDevice {
  dh::caching_device_vector<uint32_t> node_categories;

  GPUHistMakerDevice(int _device_id,
-                     EllpackPageImpl* _page,
+                     EllpackPageImpl const* _page,
                     common::Span<FeatureType const> _feature_types,
                     bst_uint _n_rows,
                     TrainParam _param,
@@ -488,7 +488,7 @@ struct GPUHistMakerDevice {
    }
  }

-  void FinalisePositionInPage(EllpackPageImpl *page,
+  void FinalisePositionInPage(EllpackPageImpl const *page,
                              const common::Span<RegTree::Node> d_nodes,
                              common::Span<FeatureType const> d_feature_types,
                              common::Span<uint32_t const> categories,
@@ -812,7 +812,6 @@ class GPUHistMakerSpecialised {
    BatchParam batch_param{
      device_,
      param_.max_bin,
-      generic_param_->gpu_page_size
    };
    auto page = (*dmat->GetBatches<EllpackPage>(batch_param).begin()).Impl();
    dh::safe_cuda(cudaSetDevice(device_));