diff --git a/dmlc-core b/dmlc-core index c39001019..9fd3b4846 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit c39001019e443c7a061789bd1180f58ce85fc3e6 +Subproject commit 9fd3b48462a7a651e12a197679f71e043dcb25a2 diff --git a/doc/parameter.md b/doc/parameter.md index 0c1c989d3..a549c25c9 100644 --- a/doc/parameter.md +++ b/doc/parameter.md @@ -138,6 +138,7 @@ Specify the learning task and the corresponding learning objective. The objectiv - "ndcg@n","map@n": n can be assigned as an integer to cut off the top positions in the lists for evaluation. - "ndcg-","map-","ndcg@n-","map@n-": In XGBoost, NDCG and MAP will evaluate the score of a list without any positive samples as 1. By adding "-" in the evaluation metric XGBoost will evaluate these score as 0 to be consistent under some conditions. training repeatively + - "gamma-deviance": [residual deviance for gamma regression] * seed [ default=0 ] - random number seed. diff --git a/src/metric/elementwise_metric.cc b/src/metric/elementwise_metric.cc index db2869efb..6e087ba26 100644 --- a/src/metric/elementwise_metric.cc +++ b/src/metric/elementwise_metric.cc @@ -135,6 +135,34 @@ struct EvalPoissionNegLogLik : public EvalEWiseBase { } }; +struct EvalGammaDeviance : public EvalEWiseBase { + const char *Name() const override { + return "gamma-deviance"; + } + inline float EvalRow(float label, float pred) const { + float epsilon = 1.0e-9; + float tmp = label / (pred + epsilon); + return tmp - std::log(tmp) - 1; + } + inline static float GetFinal(float esum, float wsum) { + return 2 * esum; + } +}; + +struct EvalGammaNLogLik: public EvalEWiseBase { + const char *Name() const override { + return "gamma-nloglik"; + } + inline float EvalRow(float y, float py) const { + float psi = 1.0; + float theta = -1. / py; + float a = psi; + float b = -std::log(-theta); + float c = 1. / psi * std::log(y/psi) - std::log(y) - common::LogGamma(1. / psi); + return -((y * theta - b) / a + c); + } +}; + XGBOOST_REGISTER_METRIC(RMSE, "rmse") .describe("Rooted mean square error.") .set_body([](const char* param) { return new EvalRMSE(); }); @@ -155,87 +183,13 @@ XGBOOST_REGISTER_METRIC(PossionNegLoglik, "poisson-nloglik") .describe("Negative loglikelihood for poisson regression.") .set_body([](const char* param) { return new EvalPoissionNegLogLik(); }); -/*! - * \brief base class of element-wise evaluation - * with additonal dispersion parameter - * \tparam Derived the name of subclass - */ -template -struct EvalEWiseBase2 : public Metric { - float Eval(const std::vector& preds, - const MetaInfo& info, - bool distributed) const override { - CHECK_NE(info.labels.size(), 0) << "label set cannot be empty"; - CHECK_EQ(preds.size(), info.labels.size()) - << "label and prediction size not match, " - << "hint: use merror or mlogloss for multi-class classification"; - const omp_ulong ndata = static_cast(info.labels.size()); +XGBOOST_REGISTER_METRIC(GammaDeviance, "gamma-deviance") +.describe("Residual deviance for gamma regression.") +.set_body([](const char* param) { return new EvalGammaDeviance(); }); - // Computer dispersion - double sum = 0.0, wsum = 0.0; - #pragma omp parallel for schedule(static) - for (omp_ulong i = 0; i < ndata; ++i) { - const float wt = info.GetWeight(i); - sum += static_cast(this)->EvalDispersion(info.labels[i], preds[i]) * wt; - wsum += wt; - } - double dat[2]; dat[0] = sum, dat[1] = wsum; - if (distributed) { - rabit::Allreduce(dat, 2); - } - double dispersion = dat[0] / (dat[1] - info.num_col); - - // Computer metric - sum = 0.0, wsum = 0.0; - #pragma omp parallel for reduction(+: sum, wsum) schedule(static) - for (omp_ulong i = 0; i < ndata; ++i) { - const float wt = info.GetWeight(i); - sum += static_cast(this)->EvalRow(info.labels[i], preds[i], dispersion) * wt; - wsum += wt; - } - dat[0] = sum, dat[1] = wsum; - if (distributed) { - rabit::Allreduce(dat, 2); - } - return Derived::GetFinal(dat[0], dat[1]); - } - /*! - * \brief to be implemented by subclass, - * get evaluation result from one row - * \param label label of current instance - * \param pred prediction value of current instance - */ - inline float EvalRow(float label, float pred, float dispersion) const; - /*! - * \brief to be overridden by subclass, final transformation - * \param esum the sum statistics returned by EvalRow - * \param wsum sum of weight - */ - inline static float GetFinal(float esum, float wsum) { - return esum / wsum; - } - inline float EvalDispersion(float label, float pred) const; -}; - -struct EvalGammaNegLogLik : public EvalEWiseBase2 { - const char *Name() const override { - return "gamma-nloglik"; - } - inline float EvalRow(float y, float py, float psi) const { - double theta = -1. / py; - double a = psi; - double b = -std::log(-theta); - double c = 1. / psi * std::log(y/psi) - std::log(y) - common::LogGamma(1. / psi); - return -((y * theta - b) / a + c); - } - inline float EvalDispersion(float y, float py) const { - return ((y - py) * (y - py)) / (py * py); - } -}; - -XGBOOST_REGISTER_METRIC(GammaNegLoglik, "gamma-nloglik") -.describe("Negative loglikelihood for gamma regression.") -.set_body([](const char* param) { return new EvalGammaNegLogLik(); }); +XGBOOST_REGISTER_METRIC(GammaNLogLik, "gamma-nloglik") +.describe("Negative log-likelihood for gamma regression.") +.set_body([](const char* param) { return new EvalGammaNLogLik(); }); } // namespace metric } // namespace xgboost