Extract Sketch Entry from hist maker. (#7503)
* Extract Sketch Entry from hist maker. * Add a new sketch container for sorted inputs. * Optimize bin search.
This commit is contained in:
@@ -369,92 +369,7 @@ class BaseMaker: public TreeUpdater {
|
||||
}
|
||||
}
|
||||
}
|
||||
/*! \brief common helper data structure to build sketch */
|
||||
struct SketchEntry {
|
||||
/*! \brief total sum of amount to be met */
|
||||
double sum_total;
|
||||
/*! \brief statistics used in the sketch */
|
||||
double rmin, wmin;
|
||||
/*! \brief last seen feature value */
|
||||
bst_float last_fvalue;
|
||||
/*! \brief current size of sketch */
|
||||
double next_goal;
|
||||
// pointer to the sketch to put things in
|
||||
common::WXQuantileSketch<bst_float, bst_float> *sketch;
|
||||
// initialize the space
|
||||
inline void Init(unsigned max_size) {
|
||||
next_goal = -1.0f;
|
||||
rmin = wmin = 0.0f;
|
||||
sketch->temp.Reserve(max_size + 1);
|
||||
sketch->temp.size = 0;
|
||||
}
|
||||
/*!
|
||||
* \brief push a new element to sketch
|
||||
* \param fvalue feature value, comes in sorted ascending order
|
||||
* \param w weight
|
||||
* \param max_size
|
||||
*/
|
||||
inline void Push(bst_float fvalue, bst_float w, unsigned max_size) {
|
||||
if (next_goal == -1.0f) {
|
||||
next_goal = 0.0f;
|
||||
last_fvalue = fvalue;
|
||||
wmin = w;
|
||||
return;
|
||||
}
|
||||
if (last_fvalue != fvalue) {
|
||||
double rmax = rmin + wmin;
|
||||
if (rmax >= next_goal && sketch->temp.size != max_size) {
|
||||
if (sketch->temp.size == 0 ||
|
||||
last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
|
||||
// push to sketch
|
||||
sketch->temp.data[sketch->temp.size] =
|
||||
common::WXQuantileSketch<bst_float, bst_float>::
|
||||
Entry(static_cast<bst_float>(rmin),
|
||||
static_cast<bst_float>(rmax),
|
||||
static_cast<bst_float>(wmin), last_fvalue);
|
||||
CHECK_LT(sketch->temp.size, max_size)
|
||||
<< "invalid maximum size max_size=" << max_size
|
||||
<< ", stemp.size" << sketch->temp.size;
|
||||
++sketch->temp.size;
|
||||
}
|
||||
if (sketch->temp.size == max_size) {
|
||||
next_goal = sum_total * 2.0f + 1e-5f;
|
||||
} else {
|
||||
next_goal = static_cast<bst_float>(sketch->temp.size * sum_total / max_size);
|
||||
}
|
||||
} else {
|
||||
if (rmax >= next_goal) {
|
||||
LOG(TRACKER) << "INFO: rmax=" << rmax
|
||||
<< ", sum_total=" << sum_total
|
||||
<< ", naxt_goal=" << next_goal
|
||||
<< ", size=" << sketch->temp.size;
|
||||
}
|
||||
}
|
||||
rmin = rmax;
|
||||
wmin = w;
|
||||
last_fvalue = fvalue;
|
||||
} else {
|
||||
wmin += w;
|
||||
}
|
||||
}
|
||||
/*! \brief push final unfinished value to the sketch */
|
||||
inline void Finalize(unsigned max_size) {
|
||||
double rmax = rmin + wmin;
|
||||
if (sketch->temp.size == 0 || last_fvalue > sketch->temp.data[sketch->temp.size-1].value) {
|
||||
CHECK_LE(sketch->temp.size, max_size)
|
||||
<< "Finalize: invalid maximum size, max_size=" << max_size
|
||||
<< ", stemp.size=" << sketch->temp.size;
|
||||
// push to sketch
|
||||
sketch->temp.data[sketch->temp.size] =
|
||||
common::WXQuantileSketch<bst_float, bst_float>::
|
||||
Entry(static_cast<bst_float>(rmin),
|
||||
static_cast<bst_float>(rmax),
|
||||
static_cast<bst_float>(wmin), last_fvalue);
|
||||
++sketch->temp.size;
|
||||
}
|
||||
sketch->PushTemp();
|
||||
}
|
||||
};
|
||||
using SketchEntry = common::SortedQuantile;
|
||||
/*! \brief training parameter of tree grower */
|
||||
TrainParam param_;
|
||||
/*! \brief queue of nodes to be expanded */
|
||||
|
||||
Reference in New Issue
Block a user