Metrics for gamma regression (#1369)
* Add deviance metric for gamma regression * Simplify the computation of nloglik for gamma regression * Add a description for gamma-deviance * Minor fix
This commit is contained in:
parent
c60a356273
commit
7089301b62
@ -1 +1 @@
|
|||||||
Subproject commit c39001019e443c7a061789bd1180f58ce85fc3e6
|
Subproject commit 9fd3b48462a7a651e12a197679f71e043dcb25a2
|
||||||
@ -138,6 +138,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
|
|||||||
- "ndcg@n","map@n": n can be assigned as an integer to cut off the top positions in the lists for evaluation.
|
- "ndcg@n","map@n": n can be assigned as an integer to cut off the top positions in the lists for evaluation.
|
||||||
- "ndcg-","map-","ndcg@n-","map@n-": In XGBoost, NDCG and MAP will evaluate the score of a list without any positive samples as 1. By adding "-" in the evaluation metric XGBoost will evaluate these score as 0 to be consistent under some conditions.
|
- "ndcg-","map-","ndcg@n-","map@n-": In XGBoost, NDCG and MAP will evaluate the score of a list without any positive samples as 1. By adding "-" in the evaluation metric XGBoost will evaluate these score as 0 to be consistent under some conditions.
|
||||||
training repeatively
|
training repeatively
|
||||||
|
- "gamma-deviance": [residual deviance for gamma regression]
|
||||||
* seed [ default=0 ]
|
* seed [ default=0 ]
|
||||||
- random number seed.
|
- random number seed.
|
||||||
|
|
||||||
|
|||||||
@ -135,6 +135,34 @@ struct EvalPoissionNegLogLik : public EvalEWiseBase<EvalPoissionNegLogLik> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct EvalGammaDeviance : public EvalEWiseBase<EvalGammaDeviance> {
|
||||||
|
const char *Name() const override {
|
||||||
|
return "gamma-deviance";
|
||||||
|
}
|
||||||
|
inline float EvalRow(float label, float pred) const {
|
||||||
|
float epsilon = 1.0e-9;
|
||||||
|
float tmp = label / (pred + epsilon);
|
||||||
|
return tmp - std::log(tmp) - 1;
|
||||||
|
}
|
||||||
|
inline static float GetFinal(float esum, float wsum) {
|
||||||
|
return 2 * esum;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct EvalGammaNLogLik: public EvalEWiseBase<EvalGammaNLogLik> {
|
||||||
|
const char *Name() const override {
|
||||||
|
return "gamma-nloglik";
|
||||||
|
}
|
||||||
|
inline float EvalRow(float y, float py) const {
|
||||||
|
float psi = 1.0;
|
||||||
|
float theta = -1. / py;
|
||||||
|
float a = psi;
|
||||||
|
float b = -std::log(-theta);
|
||||||
|
float c = 1. / psi * std::log(y/psi) - std::log(y) - common::LogGamma(1. / psi);
|
||||||
|
return -((y * theta - b) / a + c);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
XGBOOST_REGISTER_METRIC(RMSE, "rmse")
|
XGBOOST_REGISTER_METRIC(RMSE, "rmse")
|
||||||
.describe("Rooted mean square error.")
|
.describe("Rooted mean square error.")
|
||||||
.set_body([](const char* param) { return new EvalRMSE(); });
|
.set_body([](const char* param) { return new EvalRMSE(); });
|
||||||
@ -155,87 +183,13 @@ XGBOOST_REGISTER_METRIC(PossionNegLoglik, "poisson-nloglik")
|
|||||||
.describe("Negative loglikelihood for poisson regression.")
|
.describe("Negative loglikelihood for poisson regression.")
|
||||||
.set_body([](const char* param) { return new EvalPoissionNegLogLik(); });
|
.set_body([](const char* param) { return new EvalPoissionNegLogLik(); });
|
||||||
|
|
||||||
/*!
|
XGBOOST_REGISTER_METRIC(GammaDeviance, "gamma-deviance")
|
||||||
* \brief base class of element-wise evaluation
|
.describe("Residual deviance for gamma regression.")
|
||||||
* with additonal dispersion parameter
|
.set_body([](const char* param) { return new EvalGammaDeviance(); });
|
||||||
* \tparam Derived the name of subclass
|
|
||||||
*/
|
|
||||||
template<typename Derived>
|
|
||||||
struct EvalEWiseBase2 : public Metric {
|
|
||||||
float Eval(const std::vector<float>& preds,
|
|
||||||
const MetaInfo& info,
|
|
||||||
bool distributed) const override {
|
|
||||||
CHECK_NE(info.labels.size(), 0) << "label set cannot be empty";
|
|
||||||
CHECK_EQ(preds.size(), info.labels.size())
|
|
||||||
<< "label and prediction size not match, "
|
|
||||||
<< "hint: use merror or mlogloss for multi-class classification";
|
|
||||||
const omp_ulong ndata = static_cast<omp_ulong>(info.labels.size());
|
|
||||||
|
|
||||||
// Computer dispersion
|
XGBOOST_REGISTER_METRIC(GammaNLogLik, "gamma-nloglik")
|
||||||
double sum = 0.0, wsum = 0.0;
|
.describe("Negative log-likelihood for gamma regression.")
|
||||||
#pragma omp parallel for schedule(static)
|
.set_body([](const char* param) { return new EvalGammaNLogLik(); });
|
||||||
for (omp_ulong i = 0; i < ndata; ++i) {
|
|
||||||
const float wt = info.GetWeight(i);
|
|
||||||
sum += static_cast<const Derived*>(this)->EvalDispersion(info.labels[i], preds[i]) * wt;
|
|
||||||
wsum += wt;
|
|
||||||
}
|
|
||||||
double dat[2]; dat[0] = sum, dat[1] = wsum;
|
|
||||||
if (distributed) {
|
|
||||||
rabit::Allreduce<rabit::op::Sum>(dat, 2);
|
|
||||||
}
|
|
||||||
double dispersion = dat[0] / (dat[1] - info.num_col);
|
|
||||||
|
|
||||||
// Computer metric
|
|
||||||
sum = 0.0, wsum = 0.0;
|
|
||||||
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
|
|
||||||
for (omp_ulong i = 0; i < ndata; ++i) {
|
|
||||||
const float wt = info.GetWeight(i);
|
|
||||||
sum += static_cast<const Derived*>(this)->EvalRow(info.labels[i], preds[i], dispersion) * wt;
|
|
||||||
wsum += wt;
|
|
||||||
}
|
|
||||||
dat[0] = sum, dat[1] = wsum;
|
|
||||||
if (distributed) {
|
|
||||||
rabit::Allreduce<rabit::op::Sum>(dat, 2);
|
|
||||||
}
|
|
||||||
return Derived::GetFinal(dat[0], dat[1]);
|
|
||||||
}
|
|
||||||
/*!
|
|
||||||
* \brief to be implemented by subclass,
|
|
||||||
* get evaluation result from one row
|
|
||||||
* \param label label of current instance
|
|
||||||
* \param pred prediction value of current instance
|
|
||||||
*/
|
|
||||||
inline float EvalRow(float label, float pred, float dispersion) const;
|
|
||||||
/*!
|
|
||||||
* \brief to be overridden by subclass, final transformation
|
|
||||||
* \param esum the sum statistics returned by EvalRow
|
|
||||||
* \param wsum sum of weight
|
|
||||||
*/
|
|
||||||
inline static float GetFinal(float esum, float wsum) {
|
|
||||||
return esum / wsum;
|
|
||||||
}
|
|
||||||
inline float EvalDispersion(float label, float pred) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct EvalGammaNegLogLik : public EvalEWiseBase2<EvalGammaNegLogLik> {
|
|
||||||
const char *Name() const override {
|
|
||||||
return "gamma-nloglik";
|
|
||||||
}
|
|
||||||
inline float EvalRow(float y, float py, float psi) const {
|
|
||||||
double theta = -1. / py;
|
|
||||||
double a = psi;
|
|
||||||
double b = -std::log(-theta);
|
|
||||||
double c = 1. / psi * std::log(y/psi) - std::log(y) - common::LogGamma(1. / psi);
|
|
||||||
return -((y * theta - b) / a + c);
|
|
||||||
}
|
|
||||||
inline float EvalDispersion(float y, float py) const {
|
|
||||||
return ((y - py) * (y - py)) / (py * py);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
XGBOOST_REGISTER_METRIC(GammaNegLoglik, "gamma-nloglik")
|
|
||||||
.describe("Negative loglikelihood for gamma regression.")
|
|
||||||
.set_body([](const char* param) { return new EvalGammaNegLogLik(); });
|
|
||||||
|
|
||||||
} // namespace metric
|
} // namespace metric
|
||||||
} // namespace xgboost
|
} // namespace xgboost
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user