fix quantile for edge case, make logloss evaluation capped for extreme values
This commit is contained in:
parent
d53e642b5d
commit
53c9a7b66b
@ -83,7 +83,15 @@ struct EvalLogLoss : public EvalEWiseBase<EvalLogLoss> {
|
|||||||
return "logloss";
|
return "logloss";
|
||||||
}
|
}
|
||||||
inline static float EvalRow(float y, float py) {
|
inline static float EvalRow(float y, float py) {
|
||||||
return - y * std::log(py) - (1.0f - y) * std::log(1 - py);
|
const float eps = 1e-16f;
|
||||||
|
const float pneg = 1.0f - py;
|
||||||
|
if (py < eps) {
|
||||||
|
return -y * std::log(eps) - (1.0f - y) * std::log(1.0f - eps);
|
||||||
|
} else if (pneg < eps) {
|
||||||
|
return -y * std::log(1.0f - eps) - (1.0f - y) * std::log(eps);
|
||||||
|
} else {
|
||||||
|
return -y * std::log(py) - (1.0f - y) * std::log(pneg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -43,6 +43,26 @@ inline static int FindMaxIndex(const std::vector<float>& rec) {
|
|||||||
return FindMaxIndex(BeginPtr(rec), rec.size());
|
return FindMaxIndex(BeginPtr(rec), rec.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// perform numerical safe logsum
|
||||||
|
inline float LogSum(float x, float y) {
|
||||||
|
if (x < y) {
|
||||||
|
return y + std::log(std::exp(x - y) + 1.0f);
|
||||||
|
} else {
|
||||||
|
return x + std::log(std::exp(y - x) + 1.0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// numerical safe logsum
|
||||||
|
inline float LogSum(const float *rec, size_t size) {
|
||||||
|
float mx = rec[0];
|
||||||
|
for (size_t i = 1; i < size; ++i) {
|
||||||
|
mx = std::max(mx, rec[i]);
|
||||||
|
}
|
||||||
|
float sum = 0.0f;
|
||||||
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
sum += std::exp(rec[i] - mx);
|
||||||
|
}
|
||||||
|
return mx + std::log(sum);
|
||||||
|
}
|
||||||
|
|
||||||
inline static bool CmpFirst(const std::pair<float, unsigned> &a,
|
inline static bool CmpFirst(const std::pair<float, unsigned> &a,
|
||||||
const std::pair<float, unsigned> &b) {
|
const std::pair<float, unsigned> &b) {
|
||||||
|
|||||||
@ -297,6 +297,14 @@ struct WXQSummary : public WQSummary<DType, RType> {
|
|||||||
RType begin = src.data[0].rmax;
|
RType begin = src.data[0].rmax;
|
||||||
size_t n = maxsize - 1, nbig = 0;
|
size_t n = maxsize - 1, nbig = 0;
|
||||||
const RType range = src.data[src.size - 1].rmin - begin;
|
const RType range = src.data[src.size - 1].rmin - begin;
|
||||||
|
// prune off zero weights
|
||||||
|
if (range == 0) {
|
||||||
|
// special case, contain only two effective data pts
|
||||||
|
this->data[0] = src.data[0];
|
||||||
|
this->data[1] = src.data[src.size - 1];
|
||||||
|
this->size = 2;
|
||||||
|
return;
|
||||||
|
}
|
||||||
const RType chunk = 2 * range / n;
|
const RType chunk = 2 * range / n;
|
||||||
// minimized range
|
// minimized range
|
||||||
RType mrange = 0;
|
RType mrange = 0;
|
||||||
@ -323,9 +331,9 @@ struct WXQSummary : public WQSummary<DType, RType> {
|
|||||||
src.size, maxsize, static_cast<double>(range),
|
src.size, maxsize, static_cast<double>(range),
|
||||||
static_cast<double>(chunk));
|
static_cast<double>(chunk));
|
||||||
for (size_t i = 0; i < src.size; ++i) {
|
for (size_t i = 0; i < src.size; ++i) {
|
||||||
printf("[%lu] rmin=%g, rmax=%g, wmin=%g, isbig=%d\n", i,
|
printf("[%lu] rmin=%g, rmax=%g, wmin=%g, v=%g, isbig=%d\n", i,
|
||||||
src.data[i].rmin, src.data[i].rmax, src.data[i].wmin,
|
src.data[i].rmin, src.data[i].rmax, src.data[i].wmin,
|
||||||
CheckLarge(src.data[i], chunk));
|
src.data[i].value, CheckLarge(src.data[i], chunk));
|
||||||
}
|
}
|
||||||
utils::Assert(nbig < n - 1, "quantile: too many large chunk");
|
utils::Assert(nbig < n - 1, "quantile: too many large chunk");
|
||||||
}
|
}
|
||||||
@ -631,6 +639,7 @@ class QuantileSketchTemplate {
|
|||||||
* \param x the elemented added to the sketch
|
* \param x the elemented added to the sketch
|
||||||
*/
|
*/
|
||||||
inline void Push(DType x, RType w = 1) {
|
inline void Push(DType x, RType w = 1) {
|
||||||
|
if (w == static_cast<RType>(0)) return;
|
||||||
if (inqueue.qtail == inqueue.queue.size()) {
|
if (inqueue.qtail == inqueue.queue.size()) {
|
||||||
// jump from lazy one value to limit_size * 2
|
// jump from lazy one value to limit_size * 2
|
||||||
if (inqueue.queue.size() == 1) {
|
if (inqueue.queue.size() == 1) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user