Add support for Gamma regression (#1258)

* Add support for Gamma regression

* Use base_score to replace the lp_bias

* Remove the lp_bias config block

* Add a demo for running gamma regression in Python

* Typo fix

* Revise the description for objective

* Add a script to generate the autoclaims dataset
This commit is contained in:
Shengwen Yang
2016-07-07 01:22:46 +08:00
committed by Tianqi Chen
parent f74e2439e0
commit 77d17f6264
5 changed files with 181 additions and 0 deletions

View File

@@ -155,5 +155,87 @@ XGBOOST_REGISTER_METRIC(PossionNegLoglik, "poisson-nloglik")
.describe("Negative loglikelihood for poisson regression.")
.set_body([](const char* param) { return new EvalPoissionNegLogLik(); });
/*!
* \brief base class of element-wise evaluation
* with additonal dispersion parameter
* \tparam Derived the name of subclass
*/
template<typename Derived>
struct EvalEWiseBase2 : public Metric {
float Eval(const std::vector<float>& preds,
const MetaInfo& info,
bool distributed) const override {
CHECK_NE(info.labels.size(), 0) << "label set cannot be empty";
CHECK_EQ(preds.size(), info.labels.size())
<< "label and prediction size not match, "
<< "hint: use merror or mlogloss for multi-class classification";
const omp_ulong ndata = static_cast<omp_ulong>(info.labels.size());
// Computer dispersion
double sum = 0.0, wsum = 0.0;
#pragma omp parallel for schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) {
const float wt = info.GetWeight(i);
sum += static_cast<const Derived*>(this)->EvalDispersion(info.labels[i], preds[i]) * wt;
wsum += wt;
}
double dat[2]; dat[0] = sum, dat[1] = wsum;
if (distributed) {
rabit::Allreduce<rabit::op::Sum>(dat, 2);
}
double dispersion = dat[0] / (dat[1] - info.num_col);
// Computer metric
sum = 0.0, wsum = 0.0;
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
for (omp_ulong i = 0; i < ndata; ++i) {
const float wt = info.GetWeight(i);
sum += static_cast<const Derived*>(this)->EvalRow(info.labels[i], preds[i], dispersion) * wt;
wsum += wt;
}
dat[0] = sum, dat[1] = wsum;
if (distributed) {
rabit::Allreduce<rabit::op::Sum>(dat, 2);
}
return Derived::GetFinal(dat[0], dat[1]);
}
/*!
* \brief to be implemented by subclass,
* get evaluation result from one row
* \param label label of current instance
* \param pred prediction value of current instance
*/
inline float EvalRow(float label, float pred, float dispersion) const;
/*!
* \brief to be overridden by subclass, final transformation
* \param esum the sum statistics returned by EvalRow
* \param wsum sum of weight
*/
inline static float GetFinal(float esum, float wsum) {
return esum / wsum;
}
inline float EvalDispersion(float label, float pred) const;
};
struct EvalGammaNegLogLik : public EvalEWiseBase2<EvalGammaNegLogLik> {
const char *Name() const override {
return "gamma-nloglik";
}
inline float EvalRow(float y, float py, float psi) const {
double theta = -1. / py;
double a = psi;
double b = -std::log(-theta);
double c = 1. / psi * std::log(y/psi) - std::log(y) - common::LogGamma(1. / psi);
return -((y * theta - b) / a + c);
}
inline float EvalDispersion(float y, float py) const {
return ((y - py) * (y - py)) / (py * py);
}
};
XGBOOST_REGISTER_METRIC(GammaNegLoglik, "gamma-nloglik")
.describe("Negative loglikelihood for gamma regression.")
.set_body([](const char* param) { return new EvalGammaNegLogLik(); });
} // namespace metric
} // namespace xgboost