Re-implement ROC-AUC. (#6747)

* Re-implement ROC-AUC.

* Binary
* MultiClass
* LTR
* Add documents.

This PR resolves a few issues:
  - Define a value when the dataset is invalid, which can happen if there's an
  empty dataset, or when the dataset contains only positive or negative values.
  - Define ROC-AUC for multi-class classification.
  - Define weighted average value for distributed setting.
  - A correct implementation for learning to rank task.  Previous
  implementation is just binary classification with averaging across groups,
  which doesn't measure ordered learning to rank.
This commit is contained in:
Jiaming Yuan
2021-03-20 16:52:40 +08:00
committed by GitHub
parent 4ee8340e79
commit bcc0277338
27 changed files with 1622 additions and 461 deletions

View File

@@ -5,6 +5,10 @@ import itertools
import shutil
import urllib.request
import zipfile
import sys
sys.path.append("tests/python")
import testing as tm # noqa
class TestRanking:
@@ -15,9 +19,9 @@ class TestRanking:
"""
from sklearn.datasets import load_svmlight_files
# download the test data
cls.dpath = 'demo/rank/'
cls.dpath = os.path.join(tm.PROJECT_ROOT, "demo/rank/")
src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
target = cls.dpath + '/MQ2008.zip'
target = os.path.join(cls.dpath, "MQ2008.zip")
if os.path.exists(cls.dpath) and os.path.exists(target):
print("Skipping dataset download...")
@@ -79,8 +83,8 @@ class TestRanking:
Cleanup test artifacts from download and unpacking
:return:
"""
os.remove(cls.dpath + "MQ2008.zip")
shutil.rmtree(cls.dpath + "MQ2008")
os.remove(os.path.join(cls.dpath, "MQ2008.zip"))
shutil.rmtree(os.path.join(cls.dpath, "MQ2008"))
@classmethod
def __test_training_with_rank_objective(cls, rank_objective, metric_name, tolerance=1e-02):