- training with external memory part 1 of 2 (#4486)

* - training with external memory part 1 of 2
   - this pr focuses on computing the quantiles using multiple gpus on a
     dataset that uses the external cache capabilities
   - there will a follow-up pr soon after this that will support creation
     of histogram indices on large dataset as well
   - both of these changes are required to support training with external memory
   - the sparse pages in dmatrix are taken in batches and the the cut matrices
     are incrementally built
   - also snuck in some (perf) changes related to sketches aggregation amongst multiple
     features across multiple sparse page batches. instead of aggregating the summary
     inside each device and merged later, it is aggregated in-place when the device
     is working on different rows but the same feature
This commit is contained in:
sriramch
2019-05-29 13:18:34 -07:00
committed by Rory Mitchell
parent 6e16900711
commit fed665ae8a
4 changed files with 180 additions and 88 deletions

View File

@@ -1374,7 +1374,7 @@ inline void DeviceShard<GradientSumT>::CreateHistIndices(
}
template <typename GradientSumT>
class GPUHistMakerSpecialised{
class GPUHistMakerSpecialised {
public:
GPUHistMakerSpecialised() : initialised_{false}, p_last_fmat_{nullptr} {}
void Init(const std::vector<std::pair<std::string, std::string>>& args,
@@ -1449,10 +1449,12 @@ class GPUHistMakerSpecialised{
// Find the cuts.
monitor_.StartCuda("Quantiles");
common::DeviceSketch(batch, *info_, param_, &hmat_, hist_maker_param_.gpu_batch_nrows,
GPUSet::All(learner_param_->gpu_id, learner_param_->n_gpus));
// TODO(sriramch): The return value will be used when we add support for histogram
// index creation for multiple batches
common::DeviceSketch(param_, *learner_param_, hist_maker_param_.gpu_batch_nrows, dmat, &hmat_);
n_bins_ = hmat_.row_ptr.back();
monitor_.StopCuda("Quantiles");
auto is_dense = info_->num_nonzero_ == info_->num_row_ * info_->num_col_;
monitor_.StartCuda("BinningCompression");
@@ -1557,7 +1559,6 @@ class GPUHistMakerSpecialised{
GPUHistMakerTrainParam hist_maker_param_;
LearnerTrainParam const* learner_param_;
common::GHistIndexMatrix gmat_;
dh::AllReducer reducer_;