Added finding quantiles on GPU. (#3393)

* Added finding quantiles on GPU. - this includes datasets where weights are assigned to data rows - as the quantiles found by the new algorithm are not the same as those found by the old one, test thresholds in tests/python-gpu/test_gpu_updaters.py have been adjusted. * Adjustments and improved testing for finding quantiles on the GPU. - added C++ tests for the DeviceSketch() function - reduced one of the thresholds in test_gpu_updaters.py - adjusted the cuts found by the find_cuts_k kernel
2018-07-27 04:03:16 +02:00
parent e2f09db77a
commit cc6a5a3666
14 changed files with 691 additions and 116 deletions
--- a/src/common/hist_util.cc
+++ b/src/common/hist_util.cc
@@ -43,18 +43,28 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
      auto tid = static_cast<unsigned>(omp_get_thread_num());
      unsigned begin = std::min(nstep * tid, ncol);
      unsigned end = std::min(nstep * (tid + 1), ncol);
-      for (size_t i = 0; i < batch.Size(); ++i) { // NOLINT(*)
-        size_t ridx = batch.base_rowid + i;
-        SparsePage::Inst inst = batch[i];
-        for (bst_uint j = 0; j < inst.length; ++j) {
-          if (inst[j].index >= begin && inst[j].index < end) {
-            sketchs[inst[j].index].Push(inst[j].fvalue, info.GetWeight(ridx));
+      // do not iterate if no columns are assigned to the thread
+      if (begin < end && end <= ncol) {
+        for (size_t i = 0; i < batch.Size(); ++i) { // NOLINT(*)
+          size_t ridx = batch.base_rowid + i;
+          SparsePage::Inst inst = batch[i];
+          for (bst_uint j = 0; j < inst.length; ++j) {
+            if (inst[j].index >= begin && inst[j].index < end) {
+              sketchs[inst[j].index].Push(inst[j].fvalue, info.GetWeight(ridx));
+            }
          }
        }
      }
    }
  }

+  Init(&sketchs, max_num_bins);
+}
+
+void HistCutMatrix::Init
+(std::vector<WXQSketch>* in_sketchs, uint32_t max_num_bins) {
+  std::vector<WXQSketch>& sketchs = *in_sketchs;
+  constexpr int kFactor = 8;
  // gather the histogram data
  rabit::SerializeReducer<WXQSketch::SummaryContainer> sreducer;
  std::vector<WXQSketch::SummaryContainer> summary_array;
@@ -68,7 +78,7 @@ void HistCutMatrix::Init(DMatrix* p_fmat, uint32_t max_num_bins) {
  size_t nbytes = WXQSketch::SummaryContainer::CalcMemCost(max_num_bins * kFactor);
  sreducer.Allreduce(dmlc::BeginPtr(summary_array), nbytes, summary_array.size());

-  this->min_val.resize(info.num_col_);
+  this->min_val.resize(sketchs.size());
  row_ptr.push_back(0);
  for (size_t fid = 0; fid < summary_array.size(); ++fid) {
    WXQSketch::SummaryContainer a;