From 5561dd9cb0954e8d8799abb279c03119c910d4f8 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 9 Nov 2014 21:09:07 -0800 Subject: [PATCH] fix bug in queue2summary --- src/utils/quantile.h | 73 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 37 deletions(-) diff --git a/src/utils/quantile.h b/src/utils/quantile.h index b46a89d7d..aa5edb2f6 100644 --- a/src/utils/quantile.h +++ b/src/utils/quantile.h @@ -59,8 +59,7 @@ class WQuantileSketch { /*! \brief number of elements in the summary */ size_t size; // constructor - Summary(void) : size(0) { - } + Summary(void) : size(0) {} /*! * \brief the maximum error of the Summary */ @@ -118,12 +117,12 @@ class WQuantileSketch { // lastidx is used to avoid duplicated records size_t i = 0, lastidx = 0; for (RType k = 1; k < n; ++k) { - RType d2 = (k * max_rank) / n * 2; + RType dx2 = (k * max_rank) / n * 2; // find first i such that d < (rmax[i+1] + rmin[i+1]) / 2 while (i < src.size - 1 && - d2 < src.data[i + 1].rmax + src.data[i + 1].rmin) ++i; + dx2 < src.data[i + 1].rmax + src.data[i + 1].rmin) ++i; if (i == src.size - 1) break; - if (d2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) { + if (dx2 < src.data[i].rmin_next() + src.data[i + 1].rmax_prev()) { if (i != lastidx) { data[size++] = src.data[i]; lastidx = i; } @@ -155,13 +154,13 @@ class WQuantileSketch { bprev_rmin = b->rmin_next(); ++dst; ++a; ++b; } else if (a->value < b->value) { - *dst = Entry(bprev_rmin + a->rmin, + *dst = Entry(a->rmin + bprev_rmin, a->rmax + b->rmax_prev(), a->wmin, a->value); aprev_rmin = a->rmin_next(); ++dst; ++a; } else { - *dst = Entry(aprev_rmin + b->rmin, + *dst = Entry(b->rmin + aprev_rmin, b->rmax + a->rmax_prev(), b->wmin, b->value); bprev_rmin = b->rmin_next(); @@ -171,14 +170,14 @@ class WQuantileSketch { if (a != a_end) { RType brmax = (b_end - 1)->rmax; do { - *dst = Entry(bprev_rmin + a->rmin, brmax + a->rmax, a->wmin, a->value); + *dst = Entry(a->rmin + bprev_rmin, a->rmax + brmax, a->wmin, a->value); ++dst; ++a; } while (a != a_end); } if (b != b_end) { RType armax = (a_end - 1)->rmax; do { - *dst = Entry(aprev_rmin + b->rmin, armax + b->rmax, b->wmin, b->value); + *dst = Entry(b->rmin + aprev_rmin, b->rmax + armax, b->wmin, b->value); ++dst; ++b; } while (b != b_end); } @@ -236,12 +235,12 @@ class WQuantileSketch { ++b; } nlevel += 1; - level_batch = (b + 1) / 2 + 1; + limit_size = (b + 1) / 2 + 1; // lazy reserve the space, if there is only one value, no need to allocate space inqueue.resize(1); - data.resize(0); - level.resize(0); qtail = 0; + data.clear(); + level.clear(); } /*! * \brief add an element to a sketch @@ -249,22 +248,24 @@ class WQuantileSketch { */ inline void Add(DType x, RType w = 1) { if (qtail == inqueue.size()) { - // jump from lazy one value to level_batch * 2 + // jump from lazy one value to limit_size * 2 if (inqueue.size() == 1) { - inqueue.resize(level_batch * 2); + inqueue.resize(limit_size * 2); } else { - temp.Reserve(2 * level_batch); + temp.Reserve(limit_size * 2); this->Queue2Summary(&temp); + // cleanup queue + qtail = 0; for (size_t l = 1; true; ++l) { this->InitLevel(std::max(l + 1, nlevel)); // check if level l is empty if (level[l].size == 0) { - level[l].SetPrune(temp, level_batch); break; + level[l].SetPrune(temp, limit_size); break; } else { // level 0 is actually temp space - level[0].SetPrune(temp, level_batch); + level[0].SetPrune(temp, limit_size); temp.SetCombine(level[0], level[l]); - if (temp.size > level_batch) { + if (temp.size > limit_size) { // try next level level[l].size = 0; } else { @@ -284,18 +285,18 @@ class WQuantileSketch { /*! \brief get the summary after finalize */ inline void GetSummary(SummaryContainer *out) { if (level.size() != 0) { - out->Reserve(level_batch * 2); + out->Reserve(limit_size * 2); } this->Queue2Summary(out); if (level.size() != 0) { - level[0].SetPrune(*out, level_batch); + level[0].SetPrune(*out, limit_size); for (size_t l = 1; l < level.size(); ++l) { if (level[l].size == 0) continue; if (level[0].size == 0) { level[0].CopyFrom(level[l]); } else { out->SetCombine(level[0], level[l]); - level[0].SetPrune(*out, level_batch); + level[0].SetPrune(*out, limit_size); } } out->CopyFrom(level[0]); @@ -306,10 +307,10 @@ class WQuantileSketch { // initialize level space to at least nlevel inline void InitLevel(size_t nlevel) { if (level.size() >= nlevel) return; - data.resize(level_batch * nlevel); + data.resize(limit_size * nlevel); level.resize(nlevel, Summary()); for (size_t l = 0; l < level.size(); ++l) { - level[l].data = BeginPtr(data) + l * level_batch; + level[l].data = BeginPtr(data) + l * limit_size; } } inline void Queue2Summary(SummaryContainer *temp) { @@ -321,7 +322,7 @@ class WQuantileSketch { RType wsum = 0; // construct data with unique weights for (size_t i = 0; i < qtail;) { - size_t j = 1; + size_t j = i + 1; RType w = inqueue[i].weight; while (j < qtail && inqueue[j].value == inqueue[i].value) { w += inqueue[j].weight; ++j; @@ -329,8 +330,6 @@ class WQuantileSketch { temp->data[temp->size++] = Entry(wsum, wsum + w, w, inqueue[i].value); wsum += w; i = j; } - // clean up queue - qtail = 0; } // entry in the queue struct QEntry { @@ -355,7 +354,7 @@ class WQuantileSketch { // number of levels size_t nlevel; // size of summary in each level - size_t level_batch; + size_t limit_size; // the level of each summaries std::vector level; // content of the summary @@ -531,12 +530,12 @@ class GKQuantileSketch { } L += 1; inqueue.resize(b); - level_batch = (b + 1) / 2 + 1; - temp.Reserve(level_batch * 2); - data.resize(level_batch * L); + limit_size = (b + 1) / 2 + 1; + temp.Reserve(limit_size * 2); + data.resize(limit_size * L); for (size_t l = 0; l < L; ++l) { Summary s; s.size = 0; - s.data = BeginPtr(data) + l * level_batch; + s.data = BeginPtr(data) + l * limit_size; level.push_back(s); } qtail = 0; @@ -559,11 +558,11 @@ class GKQuantileSketch { for (size_t l = 1; l < level.size(); ++l) { // check if level l is empty if (level[l].size == 0) { - level[l].SetPrune(temp, level_batch); + level[l].SetPrune(temp, limit_size); return; } else { // level 0 is actually temp space - level[0].SetPrune(temp, level_batch); + level[0].SetPrune(temp, limit_size); temp.SetCombine(level[0], level[l]); level[l].size = 0; } @@ -583,10 +582,10 @@ class GKQuantileSketch { temp.data[i] = Entry(i + 1, i + 1, inqueue[i]); } temp.size = static_cast(qtail); - if (temp.size < level_batch) { + if (temp.size < limit_size) { level[0].CopyFrom(temp); } else { - level[0].SetPrune(temp, level_batch); + level[0].SetPrune(temp, limit_size); } // start adding other things in for (size_t l = 1; l < level.size(); ++l) { @@ -595,7 +594,7 @@ class GKQuantileSketch { level[0].CopyFrom(level[l]); } else { temp.SetCombine(level[0], level[l]); - level[0].SetPrune(temp, level_batch); + level[0].SetPrune(temp, limit_size); } level[l].size = 0; } @@ -611,7 +610,7 @@ class GKQuantileSketch { // end of the queue size_t qtail; // size of summary in each level - size_t level_batch; + size_t limit_size; // content of the summary std::vector data; // different level of summary