From 8234091368770126f997a1f8d147e1b855243b39 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 23 Jun 2020 14:27:46 +0800 Subject: [PATCH] Remove unweighted GK quantile. (#5816) --- src/common/quantile.h | 155 ------------------------------------------ 1 file changed, 155 deletions(-) diff --git a/src/common/quantile.h b/src/common/quantile.h index 067b041bc..c0079ff8e 100644 --- a/src/common/quantile.h +++ b/src/common/quantile.h @@ -445,152 +445,6 @@ struct WXQSummary : public WQSummary { } } }; -/*! - * \brief traditional GK summary - */ -template -struct GKSummary { - /*! \brief an entry in the sketch summary */ - struct Entry { - /*! \brief minimum rank */ - RType rmin; - /*! \brief maximum rank */ - RType rmax; - /*! \brief the value of data */ - DType value; - // constructor - Entry() = default; - // constructor - Entry(RType rmin, RType rmax, DType value) - : rmin(rmin), rmax(rmax), value(value) {} - }; - /*! \brief input data queue before entering the summary */ - struct Queue { - // the input queue - std::vector queue; - // end of the queue - size_t qtail; - // push data to the queue - inline void Push(DType x, RType w) { - queue[qtail++] = x; - } - inline void MakeSummary(GKSummary *out) { - std::sort(queue.begin(), queue.begin() + qtail); - out->size = qtail; - for (size_t i = 0; i < qtail; ++i) { - out->data[i] = Entry(i + 1, i + 1, queue[i]); - } - } - }; - /*! \brief data field */ - Entry *data; - /*! \brief number of elements in the summary */ - size_t size; - GKSummary(Entry *data, size_t size) - : data(data), size(size) {} - /*! \brief the maximum error of the summary */ - inline RType MaxError() const { - RType res = 0; - for (size_t i = 1; i < size; ++i) { - res = std::max(data[i].rmax - data[i-1].rmin, res); - } - return res; - } - /*! \return maximum rank in the summary */ - inline RType MaxRank() const { - return data[size - 1].rmax; - } - /*! - * \brief copy content from src - * \param src source sketch - */ - inline void CopyFrom(const GKSummary &src) { - size = src.size; - std::memcpy(data, src.data, sizeof(Entry) * size); - } - inline void CheckValid(RType eps) const { - // assume always valid - } - /*! \brief used for debug purpose, print the summary */ - inline void Print() const { - for (size_t i = 0; i < size; ++i) { - LOG(CONSOLE) << "x=" << data[i].value << "\t" - << "[" << data[i].rmin << "," << data[i].rmax << "]"; - } - } - /*! - * \brief set current summary to be pruned summary of src - * assume data field is already allocated to be at least maxsize - * \param src source summary - * \param maxsize size we can afford in the pruned sketch - */ - inline void SetPrune(const GKSummary &src, size_t maxsize) { - if (src.size <= maxsize) { - this->CopyFrom(src); return; - } - const RType max_rank = src.MaxRank(); - this->size = maxsize; - data[0] = src.data[0]; - size_t n = maxsize - 1; - RType top = 1; - for (size_t i = 1; i < n; ++i) { - RType k = (i * max_rank) / n; - while (k > src.data[top + 1].rmax) ++top; - // assert src.data[top].rmin <= k - // because k > src.data[top].rmax >= src.data[top].rmin - if ((k - src.data[top].rmin) < (src.data[top+1].rmax - k)) { - data[i] = src.data[top]; - } else { - data[i] = src.data[top + 1]; - } - } - data[n] = src.data[src.size - 1]; - } - inline void SetCombine(const GKSummary &sa, - const GKSummary &sb) { - if (sa.size == 0) { - this->CopyFrom(sb); return; - } - if (sb.size == 0) { - this->CopyFrom(sa); return; - } - CHECK(sa.size > 0 && sb.size > 0) << "invalid input for merge"; - const Entry *a = sa.data, *a_end = sa.data + sa.size; - const Entry *b = sb.data, *b_end = sb.data + sb.size; - this->size = sa.size + sb.size; - RType aprev_rmin = 0, bprev_rmin = 0; - Entry *dst = this->data; - while (a != a_end && b != b_end) { - if (a->value < b->value) { - *dst = Entry(bprev_rmin + a->rmin, - a->rmax + b->rmax - 1, a->value); - aprev_rmin = a->rmin; - ++dst; ++a; - } else { - *dst = Entry(aprev_rmin + b->rmin, - b->rmax + a->rmax - 1, b->value); - bprev_rmin = b->rmin; - ++dst; ++b; - } - } - if (a != a_end) { - RType bprev_rmax = (b_end - 1)->rmax; - do { - *dst = Entry(bprev_rmin + a->rmin, bprev_rmax + a->rmax, a->value); - ++dst; ++a; - } while (a != a_end); - } - if (b != b_end) { - RType aprev_rmax = (a_end - 1)->rmax; - do { - *dst = Entry(aprev_rmin + b->rmin, aprev_rmax + b->rmax, b->value); - ++dst; ++b; - } while (b != b_end); - } - CHECK(dst == data + size) << "bug in combine"; - } -}; - /*! * \brief template for all quantile sketch algorithm * that uses merge/prune scheme @@ -841,15 +695,6 @@ template class WXQuantileSketch : public QuantileSketchTemplate > { }; -/*! - * \brief Quantile sketch use WQSummary - * \tparam DType type of data content - * \tparam RType type of rank - */ -template -class GKQuantileSketch : - public QuantileSketchTemplate > { -}; } // namespace common } // namespace xgboost #endif // XGBOOST_COMMON_QUANTILE_H_