- training with external memory part 1 of 2 (#4486)

* - training with external memory part 1 of 2 - this pr focuses on computing the quantiles using multiple gpus on a dataset that uses the external cache capabilities - there will a follow-up pr soon after this that will support creation of histogram indices on large dataset as well - both of these changes are required to support training with external memory - the sparse pages in dmatrix are taken in batches and the the cut matrices are incrementally built - also snuck in some (perf) changes related to sketches aggregation amongst multiple features across multiple sparse page batches. instead of aggregating the summary inside each device and merged later, it is aggregated in-place when the device is working on different rows but the same feature
2019-05-29 13:18:34 -07:00
parent 6e16900711
commit fed665ae8a
4 changed files with 180 additions and 88 deletions
--- a/src/tree/updater_gpu_hist.cu
+++ b/src/tree/updater_gpu_hist.cu
@@ -1374,7 +1374,7 @@ inline void DeviceShard<GradientSumT>::CreateHistIndices(
 }

 template <typename GradientSumT>
-class GPUHistMakerSpecialised{
+class GPUHistMakerSpecialised {
 public:
  GPUHistMakerSpecialised() : initialised_{false}, p_last_fmat_{nullptr} {}
  void Init(const std::vector<std::pair<std::string, std::string>>& args,
@@ -1449,10 +1449,12 @@ class GPUHistMakerSpecialised{

    // Find the cuts.
    monitor_.StartCuda("Quantiles");
-    common::DeviceSketch(batch, *info_, param_, &hmat_, hist_maker_param_.gpu_batch_nrows,
-                         GPUSet::All(learner_param_->gpu_id, learner_param_->n_gpus));
+    // TODO(sriramch): The return value will be used when we add support for histogram
+    // index creation for multiple batches
+    common::DeviceSketch(param_, *learner_param_, hist_maker_param_.gpu_batch_nrows, dmat, &hmat_);
    n_bins_ = hmat_.row_ptr.back();
    monitor_.StopCuda("Quantiles");
+
    auto is_dense = info_->num_nonzero_ == info_->num_row_ * info_->num_col_;

    monitor_.StartCuda("BinningCompression");
@@ -1557,7 +1559,6 @@ class GPUHistMakerSpecialised{

  GPUHistMakerTrainParam hist_maker_param_;
  LearnerTrainParam const* learner_param_;
-  common::GHistIndexMatrix gmat_;

  dh::AllReducer reducer_;