Added finding quantiles on GPU. (#3393)

* Added finding quantiles on GPU. - this includes datasets where weights are assigned to data rows - as the quantiles found by the new algorithm are not the same as those found by the old one, test thresholds in tests/python-gpu/test_gpu_updaters.py have been adjusted. * Adjustments and improved testing for finding quantiles on the GPU. - added C++ tests for the DeviceSketch() function - reduced one of the thresholds in test_gpu_updaters.py - adjusted the cuts found by the find_cuts_k kernel
2018-07-27 04:03:16 +02:00
parent e2f09db77a
commit cc6a5a3666
14 changed files with 691 additions and 116 deletions
--- a/src/common/hist_util.h
+++ b/src/common/hist_util.h
@@ -12,8 +12,11 @@
 #include <vector>
 #include "row_set.h"
 #include "../tree/fast_hist_param.h"
+#include "../tree/param.h"
+#include "./quantile.h"

 namespace xgboost {
+
 namespace common {

 using tree::FastHistParam;
@@ -77,11 +80,20 @@ struct HistCutMatrix {
    return {dmlc::BeginPtr(cut) + row_ptr[fid],
                       row_ptr[fid + 1] - row_ptr[fid]};
  }
+
+  using WXQSketch = common::WXQuantileSketch<bst_float, bst_float>;
+
  // create histogram cut matrix given statistics from data
  // using approximate quantile sketch approach
  void Init(DMatrix* p_fmat, uint32_t max_num_bins);
+
+  void Init(std::vector<WXQSketch>* sketchs, uint32_t max_num_bins);
 };

+/*! \brief Builds the cut matrix on the GPU */
+void DeviceSketch
+  (const SparsePage& batch, const MetaInfo& info,
+   const tree::TrainParam& param, HistCutMatrix* hmat);

 /*!
 * \brief A single row in global histogram index.