Re-implement ROC-AUC. (#6747)

* Re-implement ROC-AUC. * Binary * MultiClass * LTR * Add documents. This PR resolves a few issues: - Define a value when the dataset is invalid, which can happen if there's an empty dataset, or when the dataset contains only positive or negative values. - Define ROC-AUC for multi-class classification. - Define weighted average value for distributed setting. - A correct implementation for learning to rank task. Previous implementation is just binary classification with averaging across groups, which doesn't measure ordered learning to rank.
2021-03-20 16:52:40 +08:00
parent 4ee8340e79
commit bcc0277338
27 changed files with 1622 additions and 461 deletions
--- a/src/tree/param.h
+++ b/src/tree/param.h
@@ -15,6 +15,7 @@

 #include "xgboost/parameter.h"
 #include "xgboost/data.h"
+#include "../common/math.h"

 namespace xgboost {
 namespace tree {
@@ -264,14 +265,11 @@ XGBOOST_DEVICE inline static T1 ThresholdL1(T1 w, T2 alpha) {
  return 0.0;
 }

-template <typename T>
-XGBOOST_DEVICE inline static T Sqr(T a) { return a * a; }
-
 // calculate the cost of loss function
 template <typename TrainingParams, typename T>
 XGBOOST_DEVICE inline T CalcGainGivenWeight(const TrainingParams &p,
                                            T sum_grad, T sum_hess, T w) {
-  return -(T(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * Sqr(w));
+  return -(T(2.0) * sum_grad * w + (sum_hess + p.reg_lambda) * common::Sqr(w));
 }

 // calculate weight given the statistics
@@ -296,9 +294,9 @@ XGBOOST_DEVICE inline T CalcGain(const TrainingParams &p, T sum_grad, T sum_hess
  }
  if (p.max_delta_step == 0.0f) {
    if (p.reg_alpha == 0.0f) {
-      return Sqr(sum_grad) / (sum_hess + p.reg_lambda);
+      return common::Sqr(sum_grad) / (sum_hess + p.reg_lambda);
    } else {
-      return Sqr(ThresholdL1(sum_grad, p.reg_alpha)) /
+      return common::Sqr(ThresholdL1(sum_grad, p.reg_alpha)) /
          (sum_hess + p.reg_lambda);
    }
  } else {
--- a/src/tree/split_evaluator.h
+++ b/src/tree/split_evaluator.h
@@ -114,7 +114,7 @@ class TreeEvaluator {
      }
      // Avoiding tree::CalcGainGivenWeight can significantly reduce avg floating point error.
      if (p.max_delta_step == 0.0f && has_constraint == false) {
-        return Sqr(ThresholdL1(stats.sum_grad, p.reg_alpha)) /
+        return common::Sqr(ThresholdL1(stats.sum_grad, p.reg_alpha)) /
               (stats.sum_hess + p.reg_lambda);
      }
      return tree::CalcGainGivenWeight<ParamT, float>(p, stats.sum_grad,