Add rmsle metric and reg:squaredlogerror objective (#4541)
This commit is contained in:
parent
9683fd433e
commit
2f1319f273
@ -151,7 +151,7 @@ Parameters for Tree Booster
|
||||
- ``refresh``: refreshes tree's statistics and/or leaf values based on the current data. Note that no random subsampling of data rows is performed.
|
||||
- ``prune``: prunes the splits where loss < min_split_loss (or gamma).
|
||||
|
||||
- In a distributed setting, the implicit updater sequence value would be adjusted to ``grow_histmaker,prune`` by default, and you can set ``tree_method`` as ``hist`` to use ``grow_histmaker``.
|
||||
- In a distributed setting, the implicit updater sequence value would be adjusted to ``grow_histmaker,prune`` by default, and you can set ``tree_method`` as ``hist`` to use ``grow_histmaker``.
|
||||
|
||||
* ``refresh_leaf`` [default=1]
|
||||
|
||||
@ -295,6 +295,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
|
||||
* ``objective`` [default=reg:squarederror]
|
||||
|
||||
- ``reg:squarederror``: regression with squared loss
|
||||
- ``reg:squaredlogerror``: regression with squared log loss :math:`\frac{1}{2}[log(pred + 1) - log(label + 1)]^2`. All input labels are required to be greater than -1. Also, see metric ``rmsle`` for possible issue with this objective.
|
||||
- ``reg:logistic``: logistic regression
|
||||
- ``binary:logistic``: logistic regression for binary classification, output probability
|
||||
- ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation
|
||||
@ -325,6 +326,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
|
||||
- The choices are listed below:
|
||||
|
||||
- ``rmse``: `root mean square error <http://en.wikipedia.org/wiki/Root_mean_square_error>`_
|
||||
- ``rmsle``: root mean square log error: :math:`\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}`. Default metric of ``reg:squaredlogerror`` objective. This metric reduces errors generated by outliers in dataset. But because ``log`` function is employed, ``rmsle`` might output ``nan`` when prediction value is less than -1. See ``reg:squaredlogerror`` for other requirements.
|
||||
- ``mae``: `mean absolute error <https://en.wikipedia.org/wiki/Mean_absolute_error>`_
|
||||
- ``logloss``: `negative log-likelihood <http://en.wikipedia.org/wiki/Log-likelihood>`_
|
||||
- ``error``: Binary classification error rate. It is calculated as ``#(wrong cases)/#(all cases)``. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
|
||||
|
||||
@ -24,8 +24,9 @@ private[spark] trait LearningTaskParams extends Params {
|
||||
|
||||
/**
|
||||
* Specify the learning task and the corresponding learning objective.
|
||||
* options: reg:squarederror, reg:logistic, binary:logistic, binary:logitraw, count:poisson,
|
||||
* multi:softmax, multi:softprob, rank:pairwise, reg:gamma. default: reg:squarederror
|
||||
* options: reg:squarederror, reg:squaredlogerror, reg:logistic, binary:logistic, binary:logitraw,
|
||||
* count:poisson, multi:softmax, multi:softprob, rank:pairwise, reg:gamma.
|
||||
* default: reg:squarederror
|
||||
*/
|
||||
final val objective = new Param[String](this, "objective", "objective function used for " +
|
||||
s"training, options: {${LearningTaskParams.supportedObjective.mkString(",")}",
|
||||
@ -56,7 +57,7 @@ private[spark] trait LearningTaskParams extends Params {
|
||||
/**
|
||||
* evaluation metrics for validation data, a default metric will be assigned according to
|
||||
* objective(rmse for regression, and error for classification, mean average precision for
|
||||
* ranking). options: rmse, mae, logloss, error, merror, mlogloss, auc, aucpr, ndcg, map,
|
||||
* ranking). options: rmse, rmsle, mae, logloss, error, merror, mlogloss, auc, aucpr, ndcg, map,
|
||||
* gamma-deviance
|
||||
*/
|
||||
final val evalMetric = new Param[String](this, "evalMetric", "evaluation metrics for " +
|
||||
@ -106,14 +107,14 @@ private[spark] trait LearningTaskParams extends Params {
|
||||
|
||||
private[spark] object LearningTaskParams {
|
||||
val supportedObjective = HashSet("reg:linear", "reg:squarederror", "reg:logistic",
|
||||
"binary:logistic", "binary:logitraw", "count:poisson", "multi:softmax", "multi:softprob",
|
||||
"rank:pairwise", "rank:ndcg", "rank:map", "reg:gamma", "reg:tweedie")
|
||||
"reg:squaredlogerror", "binary:logistic", "binary:logitraw", "count:poisson", "multi:softmax",
|
||||
"multi:softprob", "rank:pairwise", "rank:ndcg", "rank:map", "reg:gamma", "reg:tweedie")
|
||||
|
||||
val supportedObjectiveType = HashSet("regression", "classification")
|
||||
|
||||
val evalMetricsToMaximize = HashSet("auc", "aucpr", "ndcg", "map")
|
||||
|
||||
val evalMetricsToMinimize = HashSet("rmse", "mae", "logloss", "error", "merror",
|
||||
val evalMetricsToMinimize = HashSet("rmse", "rmsle", "mae", "logloss", "error", "merror",
|
||||
"mlogloss", "gamma-deviance")
|
||||
|
||||
val supportedEvalMetrics = evalMetricsToMaximize union evalMetricsToMinimize
|
||||
|
||||
@ -153,6 +153,19 @@ struct EvalRowRMSE {
|
||||
}
|
||||
};
|
||||
|
||||
struct EvalRowRMSLE {
|
||||
char const* Name() const {
|
||||
return "rmsle";
|
||||
}
|
||||
XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float pred) const {
|
||||
bst_float diff = std::log1p(label) - std::log1p(pred);
|
||||
return diff * diff;
|
||||
}
|
||||
static bst_float GetFinal(bst_float esum, bst_float wsum) {
|
||||
return std::sqrt(esum / wsum);
|
||||
}
|
||||
};
|
||||
|
||||
struct EvalRowMAE {
|
||||
const char *Name() const {
|
||||
return "mae";
|
||||
@ -349,6 +362,10 @@ XGBOOST_REGISTER_METRIC(RMSE, "rmse")
|
||||
.describe("Rooted mean square error.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalRowRMSE>(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(RMSLE, "rmsle")
|
||||
.describe("Rooted mean square log error.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalRowRMSLE>(); });
|
||||
|
||||
XGBOOST_REGISTER_METRIC(MAE, "mae")
|
||||
.describe("Mean absolute error.")
|
||||
.set_body([](const char* param) { return new EvalEWiseBase<EvalRowMAE>(); });
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2017 XGBoost contributors
|
||||
* Copyright 2017-2019 XGBoost contributors
|
||||
*/
|
||||
#ifndef XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
|
||||
#define XGBOOST_OBJECTIVE_REGRESSION_LOSS_H_
|
||||
@ -36,6 +36,29 @@ struct LinearSquareLoss {
|
||||
static const char* DefaultEvalMetric() { return "rmse"; }
|
||||
};
|
||||
|
||||
struct SquaredLogError {
|
||||
XGBOOST_DEVICE static bst_float PredTransform(bst_float x) { return x; }
|
||||
XGBOOST_DEVICE static bool CheckLabel(bst_float label) {
|
||||
return label > -1;
|
||||
}
|
||||
XGBOOST_DEVICE static bst_float FirstOrderGradient(bst_float predt, bst_float label) {
|
||||
predt = fmaxf(predt, -1 + 1e-6); // ensure correct value for log1p
|
||||
return (std::log1p(predt) - std::log1p(label)) / (predt + 1);
|
||||
}
|
||||
XGBOOST_DEVICE static bst_float SecondOrderGradient(bst_float predt, bst_float label) {
|
||||
predt = fmaxf(predt, -1 + 1e-6);
|
||||
float res = (-std::log1p(predt) + std::log1p(label) + 1) /
|
||||
std::pow(predt + 1, 2);
|
||||
res = fmaxf(res, 1e-6f);
|
||||
return res;
|
||||
}
|
||||
static bst_float ProbToMargin(bst_float base_score) { return base_score; }
|
||||
static const char* LabelErrorMsg() {
|
||||
return "label must be greater than -1 for rmsle so that log(label + 1) can be valid.";
|
||||
}
|
||||
static const char* DefaultEvalMetric() { return "rmsle"; }
|
||||
};
|
||||
|
||||
// logistic loss for probability regression task
|
||||
struct LogisticRegression {
|
||||
// duplication is necessary, as __device__ specifier
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*!
|
||||
* Copyright 2015-2018 by Contributors
|
||||
* Copyright 2015-2019 by Contributors
|
||||
* \file regression_obj.cu
|
||||
* \brief Definition of single-value regression and classification objectives.
|
||||
* \author Tianqi Chen, Kailong Chen
|
||||
@ -124,6 +124,10 @@ XGBOOST_REGISTER_OBJECTIVE(SquaredLossRegression, "reg:squarederror")
|
||||
.describe("Regression with squared error.")
|
||||
.set_body([]() { return new RegLossObj<LinearSquareLoss>(); });
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(SquareLogError, "reg:squaredlogerror")
|
||||
.describe("Regression with root mean squared logarithmic error.")
|
||||
.set_body([]() { return new RegLossObj<SquaredLogError>(); });
|
||||
|
||||
XGBOOST_REGISTER_OBJECTIVE(LogisticRegression, "reg:logistic")
|
||||
.describe("Logistic regression for probability regression task.")
|
||||
.set_body([]() { return new RegLossObj<LogisticRegression>(); });
|
||||
|
||||
@ -19,6 +19,18 @@ TEST(Metric, DeclareUnifiedTest(RMSE)) {
|
||||
delete metric;
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(RMSLE)) {
|
||||
auto lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("rmsle", &lparam);
|
||||
metric->Configure({});
|
||||
ASSERT_STREQ(metric->Name(), "rmsle");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 1}, {0, 1}), 0, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f},
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}), 0.40632, 1e-4);
|
||||
delete metric;
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MAE)) {
|
||||
auto lparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
xgboost::Metric * metric = xgboost::Metric::Create("mae", &lparam);
|
||||
|
||||
@ -31,6 +31,30 @@ TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
|
||||
delete obj;
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(SquaredLog)) {
|
||||
xgboost::LearnerTrainParam tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
|
||||
xgboost::ObjFunction * obj =
|
||||
xgboost::ObjFunction::Create("reg:squaredlogerror", &tparam);
|
||||
obj->Configure(args);
|
||||
|
||||
CheckObjFunction(obj,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // weights
|
||||
{-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},
|
||||
{ 1.3205f, 1.0492f, 0.69215f, 0.34115f, 0.1091f});
|
||||
CheckObjFunction(obj,
|
||||
{0.1f, 0.2f, 0.4f, 0.8f, 1.6f}, // pred
|
||||
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, // labels
|
||||
{}, // empty weights
|
||||
{-0.5435f, -0.4257f, -0.25475f, -0.05855f, 0.1009f},
|
||||
{ 1.3205f, 1.0492f, 0.69215f, 0.34115f, 0.1091f});
|
||||
ASSERT_EQ(obj->DefaultEvalMetric(), std::string{"rmsle"});
|
||||
delete obj;
|
||||
}
|
||||
|
||||
TEST(Objective, DeclareUnifiedTest(LogisticRegressionGPair)) {
|
||||
xgboost::LearnerTrainParam tparam = xgboost::CreateEmptyGenericParam(0, NGPUS);
|
||||
std::vector<std::pair<std::string, std::string>> args;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user