Re-implement ROC-AUC. (#6747)

* Re-implement ROC-AUC. * Binary * MultiClass * LTR * Add documents. This PR resolves a few issues: - Define a value when the dataset is invalid, which can happen if there's an empty dataset, or when the dataset contains only positive or negative values. - Define ROC-AUC for multi-class classification. - Define weighted average value for distributed setting. - A correct implementation for learning to rank task. Previous implementation is just binary classification with averaging across groups, which doesn't measure ordered learning to rank.
2021-03-20 16:52:40 +08:00
parent 4ee8340e79
commit bcc0277338
27 changed files with 1622 additions and 461 deletions
--- a/tests/python-gpu/test_gpu_eval_metrics.py
+++ b/tests/python-gpu/test_gpu_eval_metrics.py
@@ -0,0 +1,47 @@
+import sys
+import xgboost
+import pytest
+
+sys.path.append("tests/python")
+import test_eval_metrics as test_em  # noqa
+
+
+class TestGPUEvalMetrics:
+    cpu_test = test_em.TestEvalMetrics()
+
+    @pytest.mark.parametrize("n_samples", [4, 100, 1000])
+    def test_roc_auc_binary(self, n_samples):
+        self.cpu_test.run_roc_auc_binary("gpu_hist", n_samples)
+
+    @pytest.mark.parametrize("n_samples", [4, 100, 1000])
+    def test_roc_auc_multi(self, n_samples):
+        self.cpu_test.run_roc_auc_multi("gpu_hist", n_samples)
+
+    @pytest.mark.parametrize("n_samples", [4, 100, 1000])
+    def test_roc_auc_ltr(self, n_samples):
+        import numpy as np
+
+        rng = np.random.RandomState(1994)
+        n_samples = n_samples
+        n_features = 10
+        X = rng.randn(n_samples, n_features)
+        y = rng.randint(0, 16, size=n_samples)
+        group = np.array([n_samples // 2, n_samples // 2])
+
+        Xy = xgboost.DMatrix(X, y, group=group)
+
+        cpu = xgboost.train(
+            {"tree_method": "hist", "eval_metric": "auc", "objective": "rank:ndcg"},
+            Xy,
+            num_boost_round=10,
+        )
+        cpu_auc = float(cpu.eval(Xy).split(":")[1])
+
+        gpu = xgboost.train(
+            {"tree_method": "gpu_hist", "eval_metric": "auc", "objective": "rank:ndcg"},
+            Xy,
+            num_boost_round=10,
+        )
+        gpu_auc = float(gpu.eval(Xy).split(":")[1])
+
+        np.testing.assert_allclose(cpu_auc, gpu_auc)