Metrics for gamma regression (#1369)

* Add deviance metric for gamma regression * Simplify the computation of nloglik for gamma regression * Add a description for gamma-deviance * Minor fix
2016-07-18 22:10:44 +08:00
parent c60a356273
commit 7089301b62
3 changed files with 36 additions and 81 deletions
--- a/2
+++ b/2
--- a/doc/parameter.md
+++ b/doc/parameter.md
@@ -138,6 +138,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
  - "ndcg@n","map@n": n can be assigned as an integer to cut off the top positions in the lists for evaluation.
  - "ndcg-","map-","ndcg@n-","map@n-": In XGBoost, NDCG and MAP will evaluate the score of a list without any positive samples as 1. By adding "-" in the evaluation metric XGBoost will evaluate these score as 0 to be consistent under some conditions.
 training repeatively
  - "gamma-deviance": [residual deviance for gamma regression]
 * seed [ default=0 ]
 - random number seed.
--- a/src/metric/elementwise_metric.cc
+++ b/src/metric/elementwise_metric.cc
@@ -135,6 +135,34 @@ struct EvalPoissionNegLogLik : public EvalEWiseBase<EvalPoissionNegLogLik> {
  }
 };
 struct EvalGammaDeviance : public EvalEWiseBase<EvalGammaDeviance> {
  const char *Name() const override {
    return "gamma-deviance";
  }
  inline float EvalRow(float label, float pred) const {
    float epsilon = 1.0e-9;
    float tmp = label / (pred + epsilon);
    return tmp - std::log(tmp) - 1;
  }
  inline static float GetFinal(float esum, float wsum) {
    return 2 * esum;
  }
 };
 struct EvalGammaNLogLik: public EvalEWiseBase<EvalGammaNLogLik> {
  const char *Name() const override {
    return "gamma-nloglik";
  }
  inline float EvalRow(float y, float py) const {
    float psi = 1.0;
    float theta = -1. / py;
    float a = psi;
    float b = -std::log(-theta);
    float c = 1. / psi * std::log(y/psi) - std::log(y) - common::LogGamma(1. / psi);
    return -((y * theta - b) / a + c);
  }
 };
 XGBOOST_REGISTER_METRIC(RMSE, "rmse")
 .describe("Rooted mean square error.")
 .set_body([](const char* param) { return new EvalRMSE(); });
@@ -155,87 +183,13 @@ XGBOOST_REGISTER_METRIC(PossionNegLoglik, "poisson-nloglik")
 .describe("Negative loglikelihood for poisson regression.")
 .set_body([](const char* param) { return new EvalPoissionNegLogLik(); });
-/*!
+XGBOOST_REGISTER_METRIC(GammaDeviance, "gamma-deviance")
- * \brief base class of element-wise evaluation
+.describe("Residual deviance for gamma regression.")
- *      with additonal dispersion parameter
+.set_body([](const char* param) { return new EvalGammaDeviance(); });
 * \tparam Derived the name of subclass
 */
 template<typename Derived>
 struct EvalEWiseBase2 : public Metric {
  float Eval(const std::vector<float>& preds,
             const MetaInfo& info,
             bool distributed) const override {
    CHECK_NE(info.labels.size(), 0) << "label set cannot be empty";
    CHECK_EQ(preds.size(), info.labels.size())
        << "label and prediction size not match, "
        << "hint: use merror or mlogloss for multi-class classification";
    const omp_ulong ndata = static_cast<omp_ulong>(info.labels.size());
-    // Computer dispersion
+XGBOOST_REGISTER_METRIC(GammaNLogLik, "gamma-nloglik")
-    double sum = 0.0, wsum = 0.0;
+.describe("Negative log-likelihood for gamma regression.")
-    #pragma omp parallel for schedule(static)
+.set_body([](const char* param) { return new EvalGammaNLogLik(); });
    for (omp_ulong i = 0; i < ndata; ++i) {
      const float wt = info.GetWeight(i);
      sum += static_cast<const Derived*>(this)->EvalDispersion(info.labels[i], preds[i]) * wt;
      wsum += wt;
    }
    double dat[2]; dat[0] = sum, dat[1] = wsum;
    if (distributed) {
      rabit::Allreduce<rabit::op::Sum>(dat, 2);
    }
    double dispersion = dat[0] / (dat[1] - info.num_col);
    // Computer metric
    sum = 0.0, wsum = 0.0;
    #pragma omp parallel for reduction(+: sum, wsum) schedule(static)
    for (omp_ulong i = 0; i < ndata; ++i) {
      const float wt = info.GetWeight(i);
      sum += static_cast<const Derived*>(this)->EvalRow(info.labels[i], preds[i], dispersion) * wt;
      wsum += wt;
    }
    dat[0] = sum, dat[1] = wsum;
    if (distributed) {
      rabit::Allreduce<rabit::op::Sum>(dat, 2);
    }
    return Derived::GetFinal(dat[0], dat[1]);
  }
  /*!
   * \brief to be implemented by subclass,
   *   get evaluation result from one row
   * \param label label of current instance
   * \param pred prediction value of current instance
   */
  inline float EvalRow(float label, float pred, float dispersion) const;
  /*!
   * \brief to be overridden by subclass, final transformation
   * \param esum the sum statistics returned by EvalRow
   * \param wsum sum of weight
   */
  inline static float GetFinal(float esum, float wsum) {
    return esum / wsum;
  }
  inline float EvalDispersion(float label, float pred) const;
 };
 struct EvalGammaNegLogLik : public EvalEWiseBase2<EvalGammaNegLogLik> {
  const char *Name() const override {
    return "gamma-nloglik";
  }
  inline float EvalRow(float y, float py, float psi) const {
    double theta = -1. / py;
    double a = psi;
    double b = -std::log(-theta);
    double c = 1. / psi * std::log(y/psi) - std::log(y) - common::LogGamma(1. / psi);
    return -((y * theta - b) / a + c);
  }
  inline float EvalDispersion(float y, float py) const {
    return ((y - py) * (y - py)) / (py * py);
  }
 };
 XGBOOST_REGISTER_METRIC(GammaNegLoglik, "gamma-nloglik")
 .describe("Negative loglikelihood for gamma regression.")
 .set_body([](const char* param) { return new EvalGammaNegLogLik(); });
 }  // namespace metric
 }  // namespace xgboost