Added finding quantiles on GPU. (#3393)

* Added finding quantiles on GPU.

- this includes datasets where weights are assigned to data rows
- as the quantiles found by the new algorithm are not the same
  as those found by the old one, test thresholds in
    tests/python-gpu/test_gpu_updaters.py have been adjusted.

* Adjustments and improved testing for finding quantiles on the GPU.

- added C++ tests for the DeviceSketch() function
- reduced one of the thresholds in test_gpu_updaters.py
- adjusted the cuts found by the find_cuts_k kernel
This commit is contained in:
Andy Adinets
2018-07-27 04:03:16 +02:00
committed by Rory Mitchell
parent e2f09db77a
commit cc6a5a3666
14 changed files with 691 additions and 116 deletions

View File

@@ -35,9 +35,9 @@ struct WQSummary {
/*! \brief the value of data */
DType value;
// constructor
Entry() = default;
XGBOOST_DEVICE Entry() {} // NOLINT
// constructor
Entry(RType rmin, RType rmax, RType wmin, DType value)
XGBOOST_DEVICE Entry(RType rmin, RType rmax, RType wmin, DType value)
: rmin(rmin), rmax(rmax), wmin(wmin), value(value) {}
/*!
* \brief debug function, check Valid
@@ -48,11 +48,11 @@ struct WQSummary {
CHECK(rmax- rmin - wmin > -eps) << "relation constraint: min/max";
}
/*! \return rmin estimation for v strictly bigger than value */
inline RType RMinNext() const {
XGBOOST_DEVICE inline RType RMinNext() const {
return rmin + wmin;
}
/*! \return rmax estimation for v strictly smaller than value */
inline RType RMaxPrev() const {
XGBOOST_DEVICE inline RType RMaxPrev() const {
return rmax - wmin;
}
};
@@ -158,6 +158,17 @@ struct WQSummary {
size = src.size;
std::memcpy(data, src.data, sizeof(Entry) * size);
}
inline void MakeFromSorted(const Entry* entries, size_t n) {
size = 0;
for (size_t i = 0; i < n;) {
size_t j = i + 1;
// ignore repeated values
for (; j < n && entries[j].value == entries[i].value; ++j) {}
data[size++] = Entry(entries[i].rmin, entries[i].rmax, entries[i].wmin,
entries[i].value);
i = j;
}
}
/*!
* \brief debug function, validate whether the summary
* run consistency check to check if it is a valid summary
@@ -676,6 +687,18 @@ class QuantileSketchTemplate {
* \param eps accuracy level of summary
*/
inline void Init(size_t maxn, double eps) {
LimitSizeLevel(maxn, eps, &nlevel, &limit_size);
// lazy reserve the space, if there is only one value, no need to allocate space
inqueue.queue.resize(1);
inqueue.qtail = 0;
data.clear();
level.clear();
}
inline static void LimitSizeLevel
(size_t maxn, double eps, size_t* out_nlevel, size_t* out_limit_size) {
size_t& nlevel = *out_nlevel;
size_t& limit_size = *out_limit_size;
nlevel = 1;
while (true) {
limit_size = static_cast<size_t>(ceil(nlevel / eps)) + 1;
@@ -687,12 +710,8 @@ class QuantileSketchTemplate {
size_t n = (1ULL << nlevel);
CHECK(n * limit_size >= maxn) << "invalid init parameter";
CHECK(nlevel <= limit_size * eps) << "invalid init parameter";
// lazy reserve the space, if there is only one value, no need to allocate space
inqueue.queue.resize(1);
inqueue.qtail = 0;
data.clear();
level.clear();
}
/*!
* \brief add an element to a sketch
* \param x The element added to the sketch
@@ -714,6 +733,13 @@ class QuantileSketchTemplate {
}
inqueue.Push(x, w);
}
inline void PushSummary(const Summary& summary) {
temp.Reserve(limit_size * 2);
temp.SetPrune(summary, limit_size * 2);
PushTemp();
}
/*! \brief push up temp */
inline void PushTemp() {
temp.Reserve(limit_size * 2);