From 33577ef5d33d63c50605097e7895fa6012bce585 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Mon, 14 Sep 2020 18:45:27 -0700
Subject: [PATCH] Add MAPE metric (#6119)

---
 R-package/R/xgb.cv.R                             |  2 ++
 R-package/R/xgb.train.R                          |  2 ++
 R-package/man/xgb.cv.Rd                          |  2 ++
 R-package/man/xgb.train.Rd                       |  2 ++
 doc/gpu/index.rst                                |  2 ++
 doc/parameter.rst                                |  1 +
 .../scala/spark/params/LearningTaskParams.scala  |  6 +++---
 src/metric/elementwise_metric.cu                 | 16 ++++++++++++++++
 tests/cpp/metric/test_elementwise_metric.cc      | 11 +++++++++++
 9 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index fb48ca607..9eea19e6d 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -36,6 +36,8 @@
 #'   \item \code{error} binary classification error rate
 #'   \item \code{rmse} Rooted mean square error
 #'   \item \code{logloss} negative log-likelihood function
+#'   \item \code{mae} Mean absolute error
+#'   \item \code{mape} Mean absolute percentage error
 #'   \item \code{auc} Area under curve
 #'   \item \code{aucpr} Area under PR curve
 #'   \item \code{merror} Exact matching error, used to evaluate multi-class classification
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 0449ae266..11ec03fe0 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -137,6 +137,8 @@
 #'            By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
 #'            Different threshold (e.g., 0.) could be specified as "error@0."
 #'      \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
+#'      \item \code{mae} Mean absolute error
+#'      \item \code{mape} Mean absolute percentage error
 #'      \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
 #'      \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
 #'      \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index 86a88007b..0967813c5 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -70,6 +70,8 @@ from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callb
   \item \code{error} binary classification error rate
   \item \code{rmse} Rooted mean square error
   \item \code{logloss} negative log-likelihood function
+  \item \code{mae} Mean absolute error
+  \item \code{mape} Mean absolute percentage error
   \item \code{auc} Area under curve
   \item \code{aucpr} Area under PR curve
   \item \code{merror} Exact matching error, used to evaluate multi-class classification
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index e68962fb6..bd3b6aa98 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -222,6 +222,8 @@ The following is the list of built-in metrics for which Xgboost provides optimiz
            By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
            Different threshold (e.g., 0.) could be specified as "error@0."
      \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
+     \item \code{mae} Mean absolute error
+     \item \code{mape} Mean absolute percentage error
      \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
      \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
      \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
diff --git a/doc/gpu/index.rst b/doc/gpu/index.rst
index 9a83993fc..d716d9b4c 100644
--- a/doc/gpu/index.rst
+++ b/doc/gpu/index.rst
@@ -153,6 +153,8 @@ Following table shows current support status for evaluation metrics on the GPU.
 +------------------------------+-------------+
 | mae                          | |tick|      |
 +------------------------------+-------------+
+| mape                         | |tick|      |
++------------------------------+-------------+
 | mphe                         | |tick|      |
 +------------------------------+-------------+
 | logloss                      | |tick|      |
diff --git a/doc/parameter.rst b/doc/parameter.rst
index 7e7e774a2..58b08e918 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -383,6 +383,7 @@ Specify the learning task and the corresponding learning objective. The objectiv
     - ``rmse``: `root mean square error <http://en.wikipedia.org/wiki/Root_mean_square_error>`_
     - ``rmsle``: root mean square log error: :math:`\sqrt{\frac{1}{N}[log(pred + 1) - log(label + 1)]^2}`. Default metric of ``reg:squaredlogerror`` objective. This metric reduces errors generated by outliers in dataset.  But because ``log`` function is employed, ``rmsle`` might output ``nan`` when prediction value is less than -1.  See ``reg:squaredlogerror`` for other requirements.
     - ``mae``: `mean absolute error <https://en.wikipedia.org/wiki/Mean_absolute_error>`_
+    - ``mape``: `mean absolute percentage error <https://en.wikipedia.org/wiki/Mean_absolute_percentage_error>`_
     - ``mphe``: `mean Pseudo Huber error <https://en.wikipedia.org/wiki/Huber_loss>`_. Default metric of ``reg:pseudohubererror`` objective.
     - ``logloss``: `negative log-likelihood <http://en.wikipedia.org/wiki/Log-likelihood>`_
     - ``error``: Binary classification error rate. It is calculated as ``#(wrong cases)/#(all cases)``. For the predictions, the evaluation will regard the instances with prediction value larger than 0.5 as positive instances, and the others as negative instances.
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/LearningTaskParams.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/LearningTaskParams.scala
index aba8d45f3..988535547 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/LearningTaskParams.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/LearningTaskParams.scala
@@ -56,8 +56,8 @@ private[spark] trait LearningTaskParams extends Params {
   /**
    * evaluation metrics for validation data, a default metric will be assigned according to
    * objective(rmse for regression, and error for classification, mean average precision for
-   * ranking). options: rmse, rmsle, mae, logloss, error, merror, mlogloss, auc, aucpr, ndcg, map,
-   * gamma-deviance
+   * ranking). options: rmse, rmsle, mae, mape, logloss, error, merror, mlogloss, auc, aucpr, ndcg,
+   * map, gamma-deviance
    */
   final val evalMetric = new Param[String](this, "evalMetric", "evaluation metrics for " +
     "validation data, a default metric will be assigned according to objective " +
@@ -121,6 +121,6 @@ private[spark] object LearningTaskParams {
 
   val evalMetricsToMaximize = HashSet("auc", "aucpr", "ndcg", "map")
 
-  val evalMetricsToMinimize = HashSet("rmse", "rmsle", "mae", "logloss", "error", "merror",
+  val evalMetricsToMinimize = HashSet("rmse", "rmsle", "mae", "mape", "logloss", "error", "merror",
     "mlogloss", "gamma-deviance")
 }
diff --git a/src/metric/elementwise_metric.cu b/src/metric/elementwise_metric.cu
index f57ac54e6..e1e3de6cd 100644
--- a/src/metric/elementwise_metric.cu
+++ b/src/metric/elementwise_metric.cu
@@ -168,6 +168,18 @@ struct EvalRowMAE {
   }
 };
 
+struct EvalRowMAPE {
+  const char *Name() const {
+    return "mape";
+  }
+  XGBOOST_DEVICE bst_float EvalRow(bst_float label, bst_float pred) const {
+    return std::abs((label - pred) / label);
+  }
+  static bst_float GetFinal(bst_float esum, bst_float wsum) {
+    return wsum == 0 ? esum : esum / wsum;
+  }
+};
+
 struct EvalRowLogLoss {
   const char *Name() const {
     return "logloss";
@@ -369,6 +381,10 @@ XGBOOST_REGISTER_METRIC(MAE, "mae")
 .describe("Mean absolute error.")
 .set_body([](const char* param) { return new EvalEWiseBase<EvalRowMAE>(); });
 
+XGBOOST_REGISTER_METRIC(MAPE, "mape")
+    .describe("Mean absolute percentage error.")
+    .set_body([](const char* param) { return new EvalEWiseBase<EvalRowMAPE>(); });
+
 XGBOOST_REGISTER_METRIC(MPHE, "mphe")
 .describe("Mean Pseudo Huber error.")
 .set_body([](const char* param) { return new EvalEWiseBase<EvalRowMPHE>(); });
diff --git a/tests/cpp/metric/test_elementwise_metric.cc b/tests/cpp/metric/test_elementwise_metric.cc
index a173d0a0c..aa1f227ac 100644
--- a/tests/cpp/metric/test_elementwise_metric.cc
+++ b/tests/cpp/metric/test_elementwise_metric.cc
@@ -44,6 +44,17 @@ TEST(Metric, DeclareUnifiedTest(MAE)) {
   delete metric;
 }
 
+TEST(Metric, DeclareUnifiedTest(MAPE)) {
+  auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
+  xgboost::Metric * metric = xgboost::Metric::Create("mape", &lparam);
+  metric->Configure({});
+  ASSERT_STREQ(metric->Name(), "mape");
+  EXPECT_NEAR(GetMetricEval(metric, {150, 300}, {100, 200}), 0.5f, 1e-10);
+  EXPECT_NEAR(GetMetricEval(metric, {50, 400, 500, 4000}, {100, 200, 500, 1000}),
+              1.125f, 0.001f);
+  delete metric;
+}
+
 TEST(Metric, DeclareUnifiedTest(MPHE)) {
   auto lparam = xgboost::CreateEmptyGenericParam(GPUIDX);
   xgboost::Metric * metric = xgboost::Metric::Create("mphe", &lparam);