Add support for Gamma regression (#1258)
* Add support for Gamma regression * Use base_score to replace the lp_bias * Remove the lp_bias config block * Add a demo for running gamma regression in Python * Typo fix * Revise the description for objective * Add a script to generate the autoclaims dataset
This commit is contained in:
committed by
Tianqi Chen
parent
f74e2439e0
commit
77d17f6264
@@ -155,5 +155,87 @@ XGBOOST_REGISTER_METRIC(PossionNegLoglik, "poisson-nloglik")
|
||||
.describe("Negative loglikelihood for poisson regression.")
|
||||
.set_body([](const char* param) { return new EvalPoissionNegLogLik(); });
|
||||
|
||||
/*!
|
||||
* \brief base class of element-wise evaluation
|
||||
* with additonal dispersion parameter
|
||||
* \tparam Derived the name of subclass
|
||||
*/
|
||||
template<typename Derived>
|
||||
struct EvalEWiseBase2 : public Metric {
|
||||
float Eval(const std::vector<float>& preds,
|
||||
const MetaInfo& info,
|
||||
bool distributed) const override {
|
||||
CHECK_NE(info.labels.size(), 0) << "label set cannot be empty";
|
||||
CHECK_EQ(preds.size(), info.labels.size())
|
||||
<< "label and prediction size not match, "
|
||||
<< "hint: use merror or mlogloss for multi-class classification";
|
||||
const omp_ulong ndata = static_cast<omp_ulong>(info.labels.size());
|
||||
|
||||
// Computer dispersion
|
||||
double sum = 0.0, wsum = 0.0;
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (omp_ulong i = 0; i < ndata; ++i) {
|
||||
const float wt = info.GetWeight(i);
|
||||
sum += static_cast<const Derived*>(this)->EvalDispersion(info.labels[i], preds[i]) * wt;
|
||||
wsum += wt;
|
||||
}
|
||||
double dat[2]; dat[0] = sum, dat[1] = wsum;
|
||||
if (distributed) {
|
||||
rabit::Allreduce<rabit::op::Sum>(dat, 2);
|
||||
}
|
||||
double dispersion = dat[0] / (dat[1] - info.num_col);
|
||||
|
||||
// Computer metric
|
||||
sum = 0.0, wsum = 0.0;
|
||||
#pragma omp parallel for reduction(+: sum, wsum) schedule(static)
|
||||
for (omp_ulong i = 0; i < ndata; ++i) {
|
||||
const float wt = info.GetWeight(i);
|
||||
sum += static_cast<const Derived*>(this)->EvalRow(info.labels[i], preds[i], dispersion) * wt;
|
||||
wsum += wt;
|
||||
}
|
||||
dat[0] = sum, dat[1] = wsum;
|
||||
if (distributed) {
|
||||
rabit::Allreduce<rabit::op::Sum>(dat, 2);
|
||||
}
|
||||
return Derived::GetFinal(dat[0], dat[1]);
|
||||
}
|
||||
/*!
|
||||
* \brief to be implemented by subclass,
|
||||
* get evaluation result from one row
|
||||
* \param label label of current instance
|
||||
* \param pred prediction value of current instance
|
||||
*/
|
||||
inline float EvalRow(float label, float pred, float dispersion) const;
|
||||
/*!
|
||||
* \brief to be overridden by subclass, final transformation
|
||||
* \param esum the sum statistics returned by EvalRow
|
||||
* \param wsum sum of weight
|
||||
*/
|
||||
inline static float GetFinal(float esum, float wsum) {
|
||||
return esum / wsum;
|
||||
}
|
||||
inline float EvalDispersion(float label, float pred) const;
|
||||
};
|
||||
|
||||
struct EvalGammaNegLogLik : public EvalEWiseBase2<EvalGammaNegLogLik> {
|
||||
const char *Name() const override {
|
||||
return "gamma-nloglik";
|
||||
}
|
||||
inline float EvalRow(float y, float py, float psi) const {
|
||||
double theta = -1. / py;
|
||||
double a = psi;
|
||||
double b = -std::log(-theta);
|
||||
double c = 1. / psi * std::log(y/psi) - std::log(y) - common::LogGamma(1. / psi);
|
||||
return -((y * theta - b) / a + c);
|
||||
}
|
||||
inline float EvalDispersion(float y, float py) const {
|
||||
return ((y - py) * (y - py)) / (py * py);
|
||||
}
|
||||
};
|
||||
|
||||
XGBOOST_REGISTER_METRIC(GammaNegLoglik, "gamma-nloglik")
|
||||
.describe("Negative loglikelihood for gamma regression.")
|
||||
.set_body([](const char* param) { return new EvalGammaNegLogLik(); });
|
||||
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -217,5 +217,60 @@ DMLC_REGISTER_PARAMETER(PoissonRegressionParam);
|
||||
XGBOOST_REGISTER_OBJECTIVE(PoissonRegression, "count:poisson")
|
||||
.describe("Possion regression for count data.")
|
||||
.set_body([]() { return new PoissonRegression(); });
|
||||
|
||||
// gamma regression
|
||||
class GammaRegression : public ObjFunction {
|
||||
public:
|
||||
// declare functions
|
||||
void Configure(const std::vector<std::pair<std::string, std::string> >& args) override {
|
||||
}
|
||||
|
||||
void GetGradient(const std::vector<float> &preds,
|
||||
const MetaInfo &info,
|
||||
int iter,
|
||||
std::vector<bst_gpair> *out_gpair) override {
|
||||
CHECK_NE(info.labels.size(), 0) << "label set cannot be empty";
|
||||
CHECK_EQ(preds.size(), info.labels.size()) << "labels are not correctly provided";
|
||||
out_gpair->resize(preds.size());
|
||||
// check if label in range
|
||||
bool label_correct = true;
|
||||
// start calculating gradient
|
||||
const omp_ulong ndata = static_cast<omp_ulong>(preds.size()); // NOLINT(*)
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (omp_ulong i = 0; i < ndata; ++i) { // NOLINT(*)
|
||||
float p = preds[i];
|
||||
float w = info.GetWeight(i);
|
||||
float y = info.labels[i];
|
||||
if (y >= 0.0f) {
|
||||
out_gpair->at(i) = bst_gpair((1 - y / std::exp(p)) * w, y / std::exp(p) * w);
|
||||
} else {
|
||||
label_correct = false;
|
||||
}
|
||||
}
|
||||
CHECK(label_correct) << "GammaRegression: label must be positive";
|
||||
}
|
||||
void PredTransform(std::vector<float> *io_preds) override {
|
||||
std::vector<float> &preds = *io_preds;
|
||||
const long ndata = static_cast<long>(preds.size()); // NOLINT(*)
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (long j = 0; j < ndata; ++j) { // NOLINT(*)
|
||||
preds[j] = std::exp(preds[j]);
|
||||
}
|
||||
}
|
||||
void EvalTransform(std::vector<float> *io_preds) override {
|
||||
PredTransform(io_preds);
|
||||
}
|
||||
float ProbToMargin(float base_score) const override {
|
||||
return std::log(base_score);
|
||||
}
|
||||
const char* DefaultEvalMetric(void) const override {
|
||||
return "gamma-nloglik";
|
||||
}
|
||||
};
|
||||
|
||||
// register the ojective functions
|
||||
XGBOOST_REGISTER_OBJECTIVE(GammaRegression, "reg:gamma")
|
||||
.describe("Gamma regression for severity data.")
|
||||
.set_body([]() { return new GammaRegression(); });
|
||||
} // namespace obj
|
||||
} // namespace xgboost
|
||||
|
||||
Reference in New Issue
Block a user