Re-implement ROC-AUC. (#6747)
* Re-implement ROC-AUC. * Binary * MultiClass * LTR * Add documents. This PR resolves a few issues: - Define a value when the dataset is invalid, which can happen if there's an empty dataset, or when the dataset contains only positive or negative values. - Define ROC-AUC for multi-class classification. - Define weighted average value for distributed setting. - A correct implementation for learning to rank task. Previous implementation is just binary classification with averaging across groups, which doesn't measure ordered learning to rank.
This commit is contained in:
@@ -123,3 +123,90 @@ class TestEvalMetrics:
|
||||
gamma_dev = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1].split(":")[0])
|
||||
skl_gamma_dev = mean_gamma_deviance(y, score)
|
||||
np.testing.assert_allclose(gamma_dev, skl_gamma_dev, rtol=1e-6)
|
||||
|
||||
def run_roc_auc_binary(self, tree_method, n_samples):
|
||||
import numpy as np
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
n_samples = n_samples
|
||||
n_features = 10
|
||||
|
||||
X, y = make_classification(
|
||||
n_samples,
|
||||
n_features,
|
||||
n_informative=n_features,
|
||||
n_redundant=0,
|
||||
random_state=rng
|
||||
)
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
booster = xgb.train(
|
||||
{
|
||||
"tree_method": tree_method,
|
||||
"eval_metric": "auc",
|
||||
"objective": "binary:logistic",
|
||||
},
|
||||
Xy,
|
||||
num_boost_round=8,
|
||||
)
|
||||
score = booster.predict(Xy)
|
||||
skl_auc = roc_auc_score(y, score)
|
||||
auc = float(booster.eval(Xy).split(":")[1])
|
||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
||||
|
||||
X = rng.randn(*X.shape)
|
||||
score = booster.predict(xgb.DMatrix(X))
|
||||
skl_auc = roc_auc_score(y, score)
|
||||
auc = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1])
|
||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
||||
|
||||
@pytest.mark.skipif(**tm.no_sklearn())
|
||||
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
||||
def test_roc_auc(self, n_samples):
|
||||
self.run_roc_auc_binary("hist", n_samples)
|
||||
|
||||
def run_roc_auc_multi(self, tree_method, n_samples):
|
||||
import numpy as np
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
n_samples = n_samples
|
||||
n_features = 10
|
||||
n_classes = 4
|
||||
|
||||
X, y = make_classification(
|
||||
n_samples,
|
||||
n_features,
|
||||
n_informative=n_features,
|
||||
n_redundant=0,
|
||||
n_classes=n_classes,
|
||||
random_state=rng
|
||||
)
|
||||
|
||||
Xy = xgb.DMatrix(X, y)
|
||||
booster = xgb.train(
|
||||
{
|
||||
"tree_method": tree_method,
|
||||
"eval_metric": "auc",
|
||||
"objective": "multi:softprob",
|
||||
"num_class": n_classes,
|
||||
},
|
||||
Xy,
|
||||
num_boost_round=8,
|
||||
)
|
||||
score = booster.predict(Xy)
|
||||
skl_auc = roc_auc_score(y, score, average="weighted", multi_class="ovr")
|
||||
auc = float(booster.eval(Xy).split(":")[1])
|
||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
||||
|
||||
X = rng.randn(*X.shape)
|
||||
score = booster.predict(xgb.DMatrix(X))
|
||||
skl_auc = roc_auc_score(y, score, average="weighted", multi_class="ovr")
|
||||
auc = float(booster.eval(xgb.DMatrix(X, y)).split(":")[1])
|
||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
||||
|
||||
@pytest.mark.parametrize("n_samples", [4, 100, 1000])
|
||||
def test_roc_auc_multi(self, n_samples):
|
||||
self.run_roc_auc_multi("hist", n_samples)
|
||||
|
||||
@@ -9,6 +9,7 @@ import scipy
|
||||
import json
|
||||
from typing import List, Tuple, Dict, Optional, Type, Any
|
||||
import asyncio
|
||||
from functools import partial
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import tempfile
|
||||
from sklearn.datasets import make_classification
|
||||
@@ -528,9 +529,106 @@ def run_empty_dmatrix_cls(client: "Client", parameters: dict) -> None:
|
||||
_check_outputs(out, predictions)
|
||||
|
||||
|
||||
def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) -> None:
|
||||
from sklearn import datasets
|
||||
n_samples = 100
|
||||
n_features = 97
|
||||
rng = np.random.RandomState(1994)
|
||||
|
||||
make_classification = partial(
|
||||
datasets.make_classification,
|
||||
n_features=n_features,
|
||||
random_state=rng
|
||||
)
|
||||
|
||||
# binary
|
||||
X_, y_ = make_classification(n_samples=n_samples, random_state=rng)
|
||||
X = dd.from_array(X_, chunksize=10)
|
||||
y = dd.from_array(y_, chunksize=10)
|
||||
|
||||
n_samples = n_workers - 1
|
||||
valid_X_, valid_y_ = make_classification(n_samples=n_samples, random_state=rng)
|
||||
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
|
||||
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
|
||||
|
||||
cls = xgb.dask.DaskXGBClassifier(
|
||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
||||
)
|
||||
cls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
|
||||
|
||||
# multiclass
|
||||
X_, y_ = make_classification(
|
||||
n_samples=n_samples,
|
||||
n_classes=10,
|
||||
n_informative=n_features,
|
||||
n_redundant=0,
|
||||
n_repeated=0
|
||||
)
|
||||
X = dd.from_array(X_, chunksize=10)
|
||||
y = dd.from_array(y_, chunksize=10)
|
||||
|
||||
n_samples = n_workers - 1
|
||||
valid_X_, valid_y_ = make_classification(
|
||||
n_samples=n_samples,
|
||||
n_classes=10,
|
||||
n_informative=n_features,
|
||||
n_redundant=0,
|
||||
n_repeated=0
|
||||
)
|
||||
valid_X = dd.from_array(valid_X_, chunksize=n_samples)
|
||||
valid_y = dd.from_array(valid_y_, chunksize=n_samples)
|
||||
|
||||
cls = xgb.dask.DaskXGBClassifier(
|
||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
||||
)
|
||||
cls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
|
||||
|
||||
|
||||
def test_empty_dmatrix_auc() -> None:
|
||||
with LocalCluster(n_workers=2) as cluster:
|
||||
with Client(cluster) as client:
|
||||
run_empty_dmatrix_auc(client, "hist", 2)
|
||||
|
||||
|
||||
def run_auc(client: "Client", tree_method: str) -> None:
|
||||
from sklearn import datasets
|
||||
n_samples = 100
|
||||
n_features = 97
|
||||
rng = np.random.RandomState(1994)
|
||||
X_, y_ = datasets.make_classification(
|
||||
n_samples=n_samples, n_features=n_features, random_state=rng
|
||||
)
|
||||
X = dd.from_array(X_, chunksize=10)
|
||||
y = dd.from_array(y_, chunksize=10)
|
||||
|
||||
valid_X_, valid_y_ = datasets.make_classification(
|
||||
n_samples=n_samples, n_features=n_features, random_state=rng
|
||||
)
|
||||
valid_X = dd.from_array(valid_X_, chunksize=10)
|
||||
valid_y = dd.from_array(valid_y_, chunksize=10)
|
||||
|
||||
cls = xgb.XGBClassifier(
|
||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
||||
)
|
||||
cls.fit(X_, y_, eval_metric="auc", eval_set=[(valid_X_, valid_y_)])
|
||||
|
||||
dcls = xgb.dask.DaskXGBClassifier(
|
||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
||||
)
|
||||
dcls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
|
||||
|
||||
approx = dcls.evals_result()["validation_0"]["auc"]
|
||||
exact = cls.evals_result()["validation_0"]["auc"]
|
||||
for i in range(2):
|
||||
# approximated test.
|
||||
assert np.abs(approx[i] - exact[i]) <= 0.06
|
||||
|
||||
|
||||
def test_auc(client: "Client") -> None:
|
||||
run_auc(client, "hist")
|
||||
|
||||
# No test for Exact, as empty DMatrix handling are mostly for distributed
|
||||
# environment and Exact doesn't support it.
|
||||
|
||||
def test_empty_dmatrix_hist() -> None:
|
||||
with LocalCluster(n_workers=kWorkers) as cluster:
|
||||
with Client(cluster) as client:
|
||||
|
||||
Reference in New Issue
Block a user