From 9252b686ae875997bac21ca411c3da15fd902732 Mon Sep 17 00:00:00 2001
From: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Date: Fri, 3 May 2019 10:34:44 -0700
Subject: [PATCH] Make AUCPR work with multiple query groups (#4436)

* Make AUCPR work with multiple query groups

* Check AUCPR <= 1.0 in distributed setting
---
 src/metric/rank_metric.cc    | 38 +++++++++++++++++-------------------
 tests/python/test_ranking.py | 27 +++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 20 deletions(-)
 create mode 100644 tests/python/test_ranking.py

diff --git a/src/metric/rank_metric.cc b/src/metric/rank_metric.cc
index 43a5a2333..0f3d77936 100644
--- a/src/metric/rank_metric.cc
+++ b/src/metric/rank_metric.cc
@@ -101,11 +101,11 @@ struct EvalAuc : public Metric {
     CHECK_EQ(gptr.back(), info.labels_.Size())
         << "EvalAuc: group structure must match number of prediction";
     const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
-    // sum statistics
-    bst_float sum_auc = 0.0f;
+    // sum of all AUC's across all query groups
+    double sum_auc = 0.0;
     int auc_error = 0;
     // each thread takes a local rec
-    std::vector< std::pair<bst_float, unsigned> > rec;
+    std::vector<std::pair<bst_float, unsigned>> rec;
     const auto& labels = info.labels_.HostVector();
     const std::vector<bst_float>& h_preds = preds.HostVector();
     for (bst_omp_uint k = 0; k < ngroup; ++k) {
@@ -130,7 +130,7 @@ struct EvalAuc : public Metric {
         buf_pos += ctr * wt;
         buf_neg += (1.0f - ctr) * wt;
       }
-      sum_pospair += buf_neg * (sum_npos + buf_pos *0.5);
+      sum_pospair += buf_neg * (sum_npos + buf_pos * 0.5);
       sum_npos += buf_pos;
       sum_nneg += buf_neg;
       // check weird conditions
@@ -139,15 +139,15 @@ struct EvalAuc : public Metric {
         continue;
       }
       // this is the AUC
-      sum_auc += sum_pospair / (sum_npos*sum_nneg);
+      sum_auc += sum_pospair / (sum_npos * sum_nneg);
     }
     CHECK(!auc_error)
       << "AUC: the dataset only contains pos or neg samples";
+    /* Report average AUC across all groups */
     if (distributed) {
       bst_float dat[2];
       dat[0] = static_cast<bst_float>(sum_auc);
       dat[1] = static_cast<bst_float>(ngroup);
-      // approximately estimate auc using mean
       rabit::Allreduce<rabit::op::Sum>(dat, 2);
       return dat[0] / dat[1];
     } else {
@@ -383,9 +383,9 @@ struct EvalAucPR : public Metric {
     CHECK_EQ(gptr.back(), info.labels_.Size())
         << "EvalAucPR: group structure must match number of prediction";
     const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
-    // sum statistics
-    double auc = 0.0;
-    int auc_error = 0, auc_gt_one = 0;
+    // sum of all AUC's across all query groups
+    double sum_auc = 0.0;
+    int auc_error = 0;
     // each thread takes a local rec
     std::vector<std::pair<bst_float, unsigned>> rec;
     const auto& h_labels = info.labels_.HostVector();
@@ -420,14 +420,11 @@ struct EvalAucPR : public Metric {
             b = (prevfp - h * prevtp) / total_pos;
           }
           if (0.0 != b) {
-            auc += (tp / total_pos - prevtp / total_pos -
-                    b / a * (std::log(a * tp / total_pos + b) -
-                             std::log(a * prevtp / total_pos + b))) / a;
+            sum_auc += (tp / total_pos - prevtp / total_pos -
+                        b / a * (std::log(a * tp / total_pos + b) -
+                                 std::log(a * prevtp / total_pos + b))) / a;
           } else {
-            auc += (tp / total_pos - prevtp / total_pos) / a;
-          }
-          if (auc > 1.0) {
-            auc_gt_one = 1;
+            sum_auc += (tp / total_pos - prevtp / total_pos) / a;
           }
           prevtp = tp;
           prevfp = fp;
@@ -439,16 +436,17 @@ struct EvalAucPR : public Metric {
       }
     }
     CHECK(!auc_error) << "AUC-PR: the dataset only contains pos or neg samples";
-    CHECK(!auc_gt_one) << "AUC-PR: AUC > 1.0";
+    /* Report average AUC across all groups */
     if (distributed) {
       bst_float dat[2];
-      dat[0] = static_cast<bst_float>(auc);
+      dat[0] = static_cast<bst_float>(sum_auc);
       dat[1] = static_cast<bst_float>(ngroup);
-      // approximately estimate auc using mean
       rabit::Allreduce<rabit::op::Sum>(dat, 2);
+      CHECK_LE(dat[0], dat[1]) << "AUC-PR: AUC > 1.0";
       return dat[0] / dat[1];
     } else {
-      return static_cast<bst_float>(auc) / ngroup;
+      CHECK_LE(sum_auc, static_cast<double>(ngroup)) << "AUC-PR: AUC > 1.0";
+      return static_cast<bst_float>(sum_auc) / ngroup;
     }
   }
   const char *Name() const override { return "aucpr"; }
diff --git a/tests/python/test_ranking.py b/tests/python/test_ranking.py
new file mode 100644
index 000000000..50c1dbfbc
--- /dev/null
+++ b/tests/python/test_ranking.py
@@ -0,0 +1,27 @@
+import numpy as np
+from scipy.sparse import csr_matrix
+import xgboost
+
+def test_ranking_with_unweighted_data():
+    Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])
+    Xcol = np.array([0, 0, 1, 1,  2,  2,  3,  3])
+    X = csr_matrix((np.ones(shape=8), (Xrow, Xcol)), shape=(20, 4))
+    y = np.array([0.0, 1.0, 1.0, 0.0, 0.0,
+                  0.0, 1.0, 0.0, 1.0, 0.0,
+                  0.0, 1.0, 0.0, 0.0, 1.0,
+                  0.0, 1.0, 1.0, 0.0, 0.0])
+
+    group = np.array([5, 5, 5, 5], dtype=np.uint)
+    dtrain = xgboost.DMatrix(X, label=y)
+    dtrain.set_group(group)
+
+    params = {'eta': 1, 'tree_method': 'exact',
+              'objective': 'rank:pairwise', 'eval_metric': ['auc', 'aucpr'],
+              'max_depth': 1}
+    evals_result = {}
+    bst = xgboost.train(params, dtrain, 10, evals=[(dtrain, 'train')],
+                        evals_result=evals_result)
+    auc_rec = evals_result['train']['auc']
+    assert all(p <= q for p, q in zip(auc_rec, auc_rec[1:]))
+    auc_rec = evals_result['train']['aucpr']
+    assert all(p <= q for p, q in zip(auc_rec, auc_rec[1:]))