Re-implement PR-AUC. (#7297)
* Support binary/multi-class classification, ranking. * Add documents. * Handle missing data.
This commit is contained in:
@@ -48,7 +48,7 @@ TEST(Metric, DeclareUnifiedTest(BinaryAUC)) {
|
||||
0.5, 1e-10);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MultiAUC)) {
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassAUC)) {
|
||||
auto tparam = CreateEmptyGenericParam(GPUIDX);
|
||||
std::unique_ptr<Metric> uni_ptr{
|
||||
Metric::Create("auc", &tparam)};
|
||||
@@ -64,6 +64,17 @@ TEST(Metric, DeclareUnifiedTest(MultiAUC)) {
|
||||
},
|
||||
{0, 1, 2}),
|
||||
1.0f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
0.0f, 1.0f, 0.0f, // p_1
|
||||
0.0f, 0.0f, 1.0f // p_2
|
||||
},
|
||||
{0, 1, 2},
|
||||
{1.0f, 1.0f, 1.0f}),
|
||||
1.0f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
@@ -72,6 +83,7 @@ TEST(Metric, DeclareUnifiedTest(MultiAUC)) {
|
||||
},
|
||||
{2, 1, 0}),
|
||||
0.5f, 1e-10);
|
||||
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{
|
||||
1.0f, 0.0f, 0.0f, // p_0
|
||||
@@ -139,5 +151,110 @@ TEST(Metric, DeclareUnifiedTest(RankingAUC)) {
|
||||
/*weights=*/{}, groups),
|
||||
0.769841f, 1e-6);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(PRAUC)) {
|
||||
auto tparam = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
xgboost::Metric *metric = xgboost::Metric::Create("aucpr", &tparam);
|
||||
ASSERT_STREQ(metric->Name(), "aucpr");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 0, 1, 1}, {0, 0, 1, 1}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}),
|
||||
0.5f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric,
|
||||
{0.4f, 0.2f, 0.9f, 0.1f, 0.2f, 0.4f, 0.1f, 0.1f, 0.2f, 0.1f},
|
||||
{0, 0, 0, 0, 0, 1, 0, 0, 1, 1}),
|
||||
0.2908445f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric, {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f,
|
||||
0.09f, 0.10f, 0.97f, 0.76f, 0.69f, 0.15f, 0.20f,
|
||||
0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
|
||||
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}),
|
||||
0.2769199f, 0.001f);
|
||||
auto auc = GetMetricEval(metric, {0, 1}, {});
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
|
||||
// AUCPR with instance weights
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.29f, 0.52f, 0.11f, 0.21f, 0.219f, 0.93f, 0.493f,
|
||||
0.17f, 0.47f, 0.13f, 0.43f, 0.59f, 0.87f, 0.007f},
|
||||
{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0},
|
||||
{1, 2, 7, 4, 5, 2.2f, 3.2f, 5, 6, 1, 2, 1.1f, 3.2f,
|
||||
4.5f}), // weights
|
||||
0.694435f, 0.001f);
|
||||
|
||||
// Both groups contain only pos or neg samples.
|
||||
auc = GetMetricEval(metric,
|
||||
{0, 0.1f, 0.3f, 0.5f, 0.7f},
|
||||
{1, 1, 0, 0, 0},
|
||||
{},
|
||||
{0, 2, 5});
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
delete metric;
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(MultiClassPRAUC)) {
|
||||
auto tparam = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<Metric> metric{Metric::Create("aucpr", &tparam)};
|
||||
|
||||
float auc = 0;
|
||||
std::vector<float> labels {1.0f, 0.0f, 2.0f};
|
||||
HostDeviceVector<float> predts{
|
||||
0.0f, 1.0f, 0.0f,
|
||||
1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
auc = GetMetricEval(metric.get(), predts, labels, {});
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
|
||||
auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 1.0f, 1.0f});
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
|
||||
predts.HostVector() = {
|
||||
0.0f, 1.0f, 0.0f,
|
||||
1.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 1.0f,
|
||||
0.0f, 0.0f, 1.0f,
|
||||
};
|
||||
labels = {1.0f, 0.0f, 2.0f, 1.0f};
|
||||
auc = GetMetricEval(metric.get(), predts, labels, {1.0f, 2.0f, 3.0f, 4.0f});
|
||||
ASSERT_GT(auc, 0.699);
|
||||
}
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(RankingPRAUC)) {
|
||||
auto tparam = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
|
||||
std::unique_ptr<Metric> metric{Metric::Create("aucpr", &tparam)};
|
||||
|
||||
std::vector<float> labels {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f};
|
||||
std::vector<uint32_t> groups {0, 2, 6};
|
||||
|
||||
float auc = 0;
|
||||
auc = GetMetricEval(metric.get(), {1.0f, 0.0f, 1.0f, 0.0f, 0.0f, 1.0f}, labels, {}, groups);
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
|
||||
auc = GetMetricEval(metric.get(), {1.0f, 0.5f, 0.8f, 0.3f, 0.2f, 1.0f}, labels, {}, groups);
|
||||
EXPECT_EQ(auc, 1.0f);
|
||||
|
||||
auc = GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f}, {}, groups);
|
||||
ASSERT_TRUE(std::isnan(auc));
|
||||
|
||||
// Incorrect label
|
||||
ASSERT_THROW(GetMetricEval(metric.get(), {1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f},
|
||||
{1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 3.0f}, {}, groups),
|
||||
dmlc::Error);
|
||||
|
||||
// AUCPR with groups and no weights
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric.get(), {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f,
|
||||
0.09f, 0.10f, 0.97f, 0.76f, 0.69f, 0.15f, 0.20f,
|
||||
0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
|
||||
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1},
|
||||
{}, // weights
|
||||
{0, 2, 5, 9, 14, 20}), // group info
|
||||
0.556021f, 0.001f);
|
||||
}
|
||||
} // namespace metric
|
||||
} // namespace xgboost
|
||||
|
||||
@@ -24,66 +24,6 @@ TEST(Metric, AMS) {
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(AUCPR)) {
|
||||
auto tparam = xgboost::CreateEmptyGenericParam(GPUIDX);
|
||||
xgboost::Metric *metric = xgboost::Metric::Create("aucpr", &tparam);
|
||||
ASSERT_STREQ(metric->Name(), "aucpr");
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0, 0, 1, 1}, {0, 0, 1, 1}), 1, 1e-10);
|
||||
EXPECT_NEAR(GetMetricEval(metric, {0.1f, 0.9f, 0.1f, 0.9f}, {0, 0, 1, 1}),
|
||||
0.5f, 0.001f);
|
||||
EXPECT_NEAR(
|
||||
GetMetricEval(metric,
|
||||
{0.4f, 0.2f, 0.9f, 0.1f, 0.2f, 0.4f, 0.1f, 0.1f, 0.2f, 0.1f},
|
||||
{0, 0, 0, 0, 0, 1, 0, 0, 1, 1}),
|
||||
0.2908445f, 0.001f);
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric, {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f,
|
||||
0.09f, 0.10f, 0.97f, 0.76f, 0.69f, 0.15f, 0.20f,
|
||||
0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
|
||||
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}),
|
||||
0.2769199f, 0.001f);
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 1}, {}));
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 0}, {0, 0}));
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric, {0, 0}, {1, 1}));
|
||||
|
||||
// AUCPR with instance weights
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric, {0.29f, 0.52f, 0.11f, 0.21f, 0.219f, 0.93f, 0.493f,
|
||||
0.17f, 0.47f, 0.13f, 0.43f, 0.59f, 0.87f, 0.007f},
|
||||
{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0},
|
||||
{1, 2, 7, 4, 5, 2.2f, 3.2f, 5, 6, 1, 2, 1.1f, 3.2f, 4.5f}), // weights
|
||||
0.694435f, 0.001f);
|
||||
|
||||
// AUCPR with groups and no weights
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric, {0.87f, 0.31f, 0.40f, 0.42f, 0.25f, 0.66f, 0.95f,
|
||||
0.09f, 0.10f, 0.97f, 0.76f, 0.69f, 0.15f, 0.20f,
|
||||
0.30f, 0.14f, 0.07f, 0.58f, 0.61f, 0.08f},
|
||||
{0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1},
|
||||
{}, // weights
|
||||
{0, 2, 5, 9, 14, 20}), // group info
|
||||
0.556021f, 0.001f);
|
||||
|
||||
// AUCPR with groups and weights
|
||||
EXPECT_NEAR(GetMetricEval(
|
||||
metric, {0.29f, 0.52f, 0.11f, 0.21f, 0.219f, 0.93f, 0.493f,
|
||||
0.17f, 0.47f, 0.13f, 0.43f, 0.59f, 0.87f, 0.007f}, // predictions
|
||||
{0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0},
|
||||
{1, 2, 7, 4, 5, 2.2f, 3.2f, 5, 6, 1, 2, 1.1f, 3.2f, 4.5f}, // weights
|
||||
{0, 2, 5, 9, 14}), // group info
|
||||
0.8150615f, 0.001f);
|
||||
|
||||
// Exception scenarios for grouped datasets
|
||||
EXPECT_ANY_THROW(GetMetricEval(metric,
|
||||
{0, 0.1f, 0.3f, 0.5f, 0.7f},
|
||||
{1, 1, 0, 0, 0},
|
||||
{},
|
||||
{0, 2, 5}));
|
||||
|
||||
delete metric;
|
||||
}
|
||||
|
||||
|
||||
TEST(Metric, DeclareUnifiedTest(Precision)) {
|
||||
// When the limit for precision is not given, it takes the limit at
|
||||
// std::numeric_limits<unsigned>::max(); hence all values are very small
|
||||
|
||||
@@ -47,3 +47,12 @@ class TestGPUEvalMetrics:
|
||||
gpu_auc = float(gpu.eval(Xy).split(":")[1])
|
||||
|
||||
np.testing.assert_allclose(cpu_auc, gpu_auc)
|
||||
|
||||
def test_pr_auc_binary(self):
|
||||
self.cpu_test.run_pr_auc_binary("gpu_hist")
|
||||
|
||||
def test_pr_auc_multi(self):
|
||||
self.cpu_test.run_pr_auc_multi("gpu_hist")
|
||||
|
||||
def test_pr_auc_ltr(self):
|
||||
self.cpu_test.run_pr_auc_ltr("gpu_hist")
|
||||
|
||||
@@ -239,6 +239,7 @@ class TestEvalMetrics:
|
||||
np.testing.assert_allclose(skl_auc, auc, rtol=1e-6)
|
||||
|
||||
X = rng.randn(*X.shape)
|
||||
|
||||
score = booster.predict(xgb.DMatrix(X, weight=weights))
|
||||
skl_auc = roc_auc_score(
|
||||
y, score, average="weighted", sample_weight=weights, multi_class="ovr"
|
||||
@@ -251,3 +252,63 @@ class TestEvalMetrics:
|
||||
)
|
||||
def test_roc_auc_multi(self, n_samples, weighted):
|
||||
self.run_roc_auc_multi("hist", n_samples, weighted)
|
||||
|
||||
def run_pr_auc_binary(self, tree_method):
|
||||
from sklearn.metrics import precision_recall_curve, auc
|
||||
from sklearn.datasets import make_classification
|
||||
X, y = make_classification(128, 4, n_classes=2, random_state=1994)
|
||||
clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
|
||||
clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
|
||||
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
|
||||
|
||||
y_score = clf.predict_proba(X)[:, 1] # get the positive column
|
||||
precision, recall, _ = precision_recall_curve(y, y_score)
|
||||
prauc = auc(recall, precision)
|
||||
# Interpolation results are slightly different from sklearn, but overall should be
|
||||
# similar.
|
||||
np.testing.assert_allclose(prauc, evals_result, rtol=1e-2)
|
||||
|
||||
clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=10)
|
||||
clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
|
||||
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
|
||||
np.testing.assert_allclose(0.99, evals_result, rtol=1e-2)
|
||||
|
||||
def test_pr_auc_binary(self):
|
||||
self.run_pr_auc_binary("hist")
|
||||
|
||||
def run_pr_auc_multi(self, tree_method):
|
||||
from sklearn.datasets import make_classification
|
||||
X, y = make_classification(
|
||||
64, 16, n_informative=8, n_classes=3, random_state=1994
|
||||
)
|
||||
clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=1)
|
||||
clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
|
||||
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
|
||||
# No available implementation for comparison, just check that XGBoost converges to
|
||||
# 1.0
|
||||
clf = xgb.XGBClassifier(tree_method=tree_method, n_estimators=10)
|
||||
clf.fit(X, y, eval_metric="aucpr", eval_set=[(X, y)])
|
||||
evals_result = clf.evals_result()["validation_0"]["aucpr"][-1]
|
||||
np.testing.assert_allclose(1.0, evals_result, rtol=1e-2)
|
||||
|
||||
def test_pr_auc_multi(self):
|
||||
self.run_pr_auc_multi("hist")
|
||||
|
||||
def run_pr_auc_ltr(self, tree_method):
|
||||
from sklearn.datasets import make_classification
|
||||
X, y = make_classification(128, 4, n_classes=2, random_state=1994)
|
||||
ltr = xgb.XGBRanker(tree_method=tree_method, n_estimators=16)
|
||||
groups = np.array([32, 32, 64])
|
||||
ltr.fit(
|
||||
X,
|
||||
y,
|
||||
group=groups,
|
||||
eval_set=[(X, y)],
|
||||
eval_group=[groups],
|
||||
eval_metric="aucpr"
|
||||
)
|
||||
results = ltr.evals_result()["validation_0"]["aucpr"]
|
||||
assert results[-1] >= 0.99
|
||||
|
||||
def test_pr_auc_ltr(self):
|
||||
self.run_pr_auc_ltr("hist")
|
||||
|
||||
@@ -587,7 +587,7 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
|
||||
cls = xgb.dask.DaskXGBClassifier(
|
||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
||||
)
|
||||
cls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
|
||||
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
|
||||
|
||||
# multiclass
|
||||
X_, y_ = make_classification(
|
||||
@@ -618,7 +618,7 @@ def run_empty_dmatrix_auc(client: "Client", tree_method: str, n_workers: int) ->
|
||||
cls = xgb.dask.DaskXGBClassifier(
|
||||
tree_method=tree_method, n_estimators=2, use_label_encoder=False
|
||||
)
|
||||
cls.fit(X, y, eval_metric="auc", eval_set=[(valid_X, valid_y)])
|
||||
cls.fit(X, y, eval_metric=["auc", "aucpr"], eval_set=[(valid_X, valid_y)])
|
||||
|
||||
|
||||
def test_empty_dmatrix_auc() -> None:
|
||||
|
||||
Reference in New Issue
Block a user