Rework the MAP metric. (#8931)
- The new implementation is more strict as only binary labels are accepted. The previous implementation converts values greater than 1 to 1. - Deterministic GPU. (no atomic add). - Fix top-k handling. - Precise definition of MAP. (There are other variants on how to handle top-k). - Refactor GPU ranking tests.
This commit is contained in:
@@ -177,4 +177,36 @@ TEST(NDCGCache, InitFromCPU) {
|
||||
Context ctx;
|
||||
TestNDCGCache(&ctx);
|
||||
}
|
||||
|
||||
void TestMAPCache(Context const* ctx) {
|
||||
auto p_fmat = EmptyDMatrix();
|
||||
MetaInfo& info = p_fmat->Info();
|
||||
LambdaRankParam param;
|
||||
param.UpdateAllowUnknown(Args{});
|
||||
|
||||
std::vector<float> h_data(32);
|
||||
|
||||
common::Iota(ctx, h_data.begin(), h_data.end(), 0.0f);
|
||||
info.labels.Reshape(h_data.size());
|
||||
info.num_row_ = h_data.size();
|
||||
info.labels.Data()->HostVector() = std::move(h_data);
|
||||
|
||||
auto fail = [&]() { std::make_shared<MAPCache>(ctx, info, param); };
|
||||
// binary label
|
||||
ASSERT_THROW(fail(), dmlc::Error);
|
||||
|
||||
h_data = std::vector<float>(32, 0.0f);
|
||||
h_data[1] = 1.0f;
|
||||
info.labels.Data()->HostVector() = h_data;
|
||||
auto p_cache = std::make_shared<MAPCache>(ctx, info, param);
|
||||
|
||||
ASSERT_EQ(p_cache->Acc(ctx).size(), info.num_row_);
|
||||
ASSERT_EQ(p_cache->NumRelevant(ctx).size(), info.num_row_);
|
||||
}
|
||||
|
||||
TEST(MAPCache, InitFromCPU) {
|
||||
Context ctx;
|
||||
ctx.Init(Args{});
|
||||
TestMAPCache(&ctx);
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
@@ -95,4 +95,10 @@ TEST(NDCGCache, InitFromGPU) {
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
TestNDCGCache(&ctx);
|
||||
}
|
||||
|
||||
TEST(MAPCache, InitFromGPU) {
|
||||
Context ctx;
|
||||
ctx.UpdateAllowUnknown(Args{{"gpu_id", "0"}});
|
||||
TestMAPCache(&ctx);
|
||||
}
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
@@ -6,4 +6,6 @@
|
||||
|
||||
namespace xgboost::ltr {
|
||||
void TestNDCGCache(Context const* ctx);
|
||||
|
||||
void TestMAPCache(Context const* ctx);
|
||||
} // namespace xgboost::ltr
|
||||
|
||||
@@ -141,7 +141,7 @@ TEST(Metric, DeclareUnifiedTest(MAP)) {
|
||||
// Rank metric with group info
|
||||
EXPECT_NEAR(GetMetricEval(metric,
|
||||
{0.1f, 0.9f, 0.2f, 0.8f, 0.4f, 1.7f},
|
||||
{2, 7, 1, 0, 5, 0}, // Labels
|
||||
{1, 1, 1, 0, 1, 0}, // Labels
|
||||
{}, // Weights
|
||||
{0, 2, 5, 6}), // Group info
|
||||
0.8611f, 0.001f);
|
||||
|
||||
@@ -1,194 +1,130 @@
|
||||
import itertools
|
||||
import os
|
||||
import shutil
|
||||
import urllib.request
|
||||
import zipfile
|
||||
from typing import Dict
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import xgboost
|
||||
from xgboost import testing as tm
|
||||
|
||||
pytestmark = tm.timeout(10)
|
||||
pytestmark = tm.timeout(30)
|
||||
|
||||
|
||||
class TestRanking:
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
"""
|
||||
Download and setup the test fixtures
|
||||
"""
|
||||
from sklearn.datasets import load_svmlight_files
|
||||
def comp_training_with_rank_objective(
|
||||
dtrain: xgboost.DMatrix,
|
||||
dtest: xgboost.DMatrix,
|
||||
rank_objective: str,
|
||||
metric_name: str,
|
||||
tolerance: float = 1e-02,
|
||||
) -> None:
|
||||
"""Internal method that trains the dataset using the rank objective on GPU and CPU,
|
||||
evaluates the metric and determines if the delta between the metric is within the
|
||||
tolerance level.
|
||||
|
||||
# download the test data
|
||||
cls.dpath = os.path.join(tm.demo_dir(__file__), "rank/")
|
||||
src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
|
||||
target = os.path.join(cls.dpath, "MQ2008.zip")
|
||||
"""
|
||||
# specify validations set to watch performance
|
||||
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||
|
||||
if os.path.exists(cls.dpath) and os.path.exists(target):
|
||||
print("Skipping dataset download...")
|
||||
else:
|
||||
urllib.request.urlretrieve(url=src, filename=target)
|
||||
with zipfile.ZipFile(target, 'r') as f:
|
||||
f.extractall(path=cls.dpath)
|
||||
params = {
|
||||
"booster": "gbtree",
|
||||
"tree_method": "gpu_hist",
|
||||
"gpu_id": 0,
|
||||
"predictor": "gpu_predictor",
|
||||
}
|
||||
|
||||
(x_train, y_train, qid_train, x_test, y_test, qid_test,
|
||||
x_valid, y_valid, qid_valid) = load_svmlight_files(
|
||||
(cls.dpath + "MQ2008/Fold1/train.txt",
|
||||
cls.dpath + "MQ2008/Fold1/test.txt",
|
||||
cls.dpath + "MQ2008/Fold1/vali.txt"),
|
||||
query_id=True, zero_based=False)
|
||||
# instantiate the matrices
|
||||
cls.dtrain = xgboost.DMatrix(x_train, y_train)
|
||||
cls.dvalid = xgboost.DMatrix(x_valid, y_valid)
|
||||
cls.dtest = xgboost.DMatrix(x_test, y_test)
|
||||
# set the group counts from the query IDs
|
||||
cls.dtrain.set_group([len(list(items))
|
||||
for _key, items in itertools.groupby(qid_train)])
|
||||
cls.dtest.set_group([len(list(items))
|
||||
for _key, items in itertools.groupby(qid_test)])
|
||||
cls.dvalid.set_group([len(list(items))
|
||||
for _key, items in itertools.groupby(qid_valid)])
|
||||
# save the query IDs for testing
|
||||
cls.qid_train = qid_train
|
||||
cls.qid_test = qid_test
|
||||
cls.qid_valid = qid_valid
|
||||
num_trees = 100
|
||||
check_metric_improvement_rounds = 10
|
||||
|
||||
def setup_weighted(x, y, groups):
|
||||
# Setup weighted data
|
||||
data = xgboost.DMatrix(x, y)
|
||||
groups_segment = [len(list(items))
|
||||
for _key, items in itertools.groupby(groups)]
|
||||
data.set_group(groups_segment)
|
||||
n_groups = len(groups_segment)
|
||||
weights = np.ones((n_groups,))
|
||||
data.set_weight(weights)
|
||||
return data
|
||||
evals_result: Dict[str, Dict] = {}
|
||||
params["objective"] = rank_objective
|
||||
params["eval_metric"] = metric_name
|
||||
bst = xgboost.train(
|
||||
params,
|
||||
dtrain,
|
||||
num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist,
|
||||
evals_result=evals_result,
|
||||
)
|
||||
gpu_scores = evals_result["train"][metric_name][-1]
|
||||
|
||||
cls.dtrain_w = setup_weighted(x_train, y_train, qid_train)
|
||||
cls.dtest_w = setup_weighted(x_test, y_test, qid_test)
|
||||
cls.dvalid_w = setup_weighted(x_valid, y_valid, qid_valid)
|
||||
evals_result = {}
|
||||
|
||||
# model training parameters
|
||||
cls.params = {'booster': 'gbtree',
|
||||
'tree_method': 'gpu_hist',
|
||||
'gpu_id': 0,
|
||||
'predictor': 'gpu_predictor'}
|
||||
cls.cpu_params = {'booster': 'gbtree',
|
||||
'tree_method': 'hist',
|
||||
'gpu_id': -1,
|
||||
'predictor': 'cpu_predictor'}
|
||||
cpu_params = {
|
||||
"booster": "gbtree",
|
||||
"tree_method": "hist",
|
||||
"gpu_id": -1,
|
||||
"predictor": "cpu_predictor",
|
||||
}
|
||||
cpu_params["objective"] = rank_objective
|
||||
cpu_params["eval_metric"] = metric_name
|
||||
bstc = xgboost.train(
|
||||
cpu_params,
|
||||
dtrain,
|
||||
num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist,
|
||||
evals_result=evals_result,
|
||||
)
|
||||
cpu_scores = evals_result["train"][metric_name][-1]
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls):
|
||||
"""
|
||||
Cleanup test artifacts from download and unpacking
|
||||
:return:
|
||||
"""
|
||||
os.remove(os.path.join(cls.dpath, "MQ2008.zip"))
|
||||
shutil.rmtree(os.path.join(cls.dpath, "MQ2008"))
|
||||
info = (rank_objective, metric_name)
|
||||
assert np.allclose(gpu_scores, cpu_scores, tolerance, tolerance), info
|
||||
assert np.allclose(bst.best_score, bstc.best_score, tolerance, tolerance), info
|
||||
|
||||
@classmethod
|
||||
def __test_training_with_rank_objective(cls, rank_objective, metric_name, tolerance=1e-02):
|
||||
"""
|
||||
Internal method that trains the dataset using the rank objective on GPU and CPU, evaluates
|
||||
the metric and determines if the delta between the metric is within the tolerance level
|
||||
:return:
|
||||
"""
|
||||
# specify validations set to watch performance
|
||||
watchlist = [(cls.dtest, 'eval'), (cls.dtrain, 'train')]
|
||||
evals_result_weighted: Dict[str, Dict] = {}
|
||||
dtest.set_weight(np.ones((dtest.get_group().size,)))
|
||||
dtrain.set_weight(np.ones((dtrain.get_group().size,)))
|
||||
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||
bst_w = xgboost.train(
|
||||
params,
|
||||
dtrain,
|
||||
num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist,
|
||||
evals_result=evals_result_weighted,
|
||||
)
|
||||
weighted_metric = evals_result_weighted["train"][metric_name][-1]
|
||||
|
||||
num_trees = 100
|
||||
check_metric_improvement_rounds = 10
|
||||
tolerance = 1e-5
|
||||
assert np.allclose(bst_w.best_score, bst.best_score, tolerance, tolerance)
|
||||
assert np.allclose(weighted_metric, gpu_scores, tolerance, tolerance)
|
||||
|
||||
evals_result = {}
|
||||
cls.params['objective'] = rank_objective
|
||||
cls.params['eval_metric'] = metric_name
|
||||
bst = xgboost.train(
|
||||
cls.params, cls.dtrain, num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist, evals_result=evals_result)
|
||||
gpu_map_metric = evals_result['train'][metric_name][-1]
|
||||
|
||||
evals_result = {}
|
||||
cls.cpu_params['objective'] = rank_objective
|
||||
cls.cpu_params['eval_metric'] = metric_name
|
||||
bstc = xgboost.train(
|
||||
cls.cpu_params, cls.dtrain, num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist, evals_result=evals_result)
|
||||
cpu_map_metric = evals_result['train'][metric_name][-1]
|
||||
@pytest.mark.parametrize(
|
||||
"objective,metric",
|
||||
[
|
||||
("rank:pairwise", "auc"),
|
||||
("rank:pairwise", "ndcg"),
|
||||
("rank:pairwise", "map"),
|
||||
("rank:ndcg", "auc"),
|
||||
("rank:ndcg", "ndcg"),
|
||||
("rank:ndcg", "map"),
|
||||
("rank:map", "auc"),
|
||||
("rank:map", "ndcg"),
|
||||
("rank:map", "map"),
|
||||
],
|
||||
)
|
||||
def test_with_mq2008(objective, metric) -> None:
|
||||
(
|
||||
x_train,
|
||||
y_train,
|
||||
qid_train,
|
||||
x_test,
|
||||
y_test,
|
||||
qid_test,
|
||||
x_valid,
|
||||
y_valid,
|
||||
qid_valid,
|
||||
) = tm.get_mq2008(os.path.join(os.path.join(tm.demo_dir(__file__), "rank")))
|
||||
|
||||
assert np.allclose(gpu_map_metric, cpu_map_metric, tolerance,
|
||||
tolerance)
|
||||
assert np.allclose(bst.best_score, bstc.best_score, tolerance,
|
||||
tolerance)
|
||||
if metric.find("map") != -1 or objective.find("map") != -1:
|
||||
y_train[y_train <= 1] = 0.0
|
||||
y_train[y_train > 1] = 1.0
|
||||
y_test[y_test <= 1] = 0.0
|
||||
y_test[y_test > 1] = 1.0
|
||||
|
||||
evals_result_weighted = {}
|
||||
watchlist = [(cls.dtest_w, 'eval'), (cls.dtrain_w, 'train')]
|
||||
bst_w = xgboost.train(
|
||||
cls.params, cls.dtrain_w, num_boost_round=num_trees,
|
||||
early_stopping_rounds=check_metric_improvement_rounds,
|
||||
evals=watchlist, evals_result=evals_result_weighted)
|
||||
weighted_metric = evals_result_weighted['train'][metric_name][-1]
|
||||
# GPU Ranking is not deterministic due to `AtomicAddGpair`,
|
||||
# remove tolerance once the issue is resolved.
|
||||
# https://github.com/dmlc/xgboost/issues/5561
|
||||
assert np.allclose(bst_w.best_score, bst.best_score,
|
||||
tolerance, tolerance)
|
||||
assert np.allclose(weighted_metric, gpu_map_metric,
|
||||
tolerance, tolerance)
|
||||
dtrain = xgboost.DMatrix(x_train, y_train, qid=qid_train)
|
||||
dtest = xgboost.DMatrix(x_test, y_test, qid=qid_test)
|
||||
|
||||
def test_training_rank_pairwise_map_metric(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with pairwise objective function and compare map metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:pairwise', 'map')
|
||||
|
||||
def test_training_rank_pairwise_auc_metric(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with pairwise objective function and compare auc metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:pairwise', 'auc')
|
||||
|
||||
def test_training_rank_pairwise_ndcg_metric(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with pairwise objective function and compare ndcg metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:pairwise', 'ndcg')
|
||||
|
||||
def test_training_rank_ndcg_map(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with ndcg objective function and compare map metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:ndcg', 'map')
|
||||
|
||||
def test_training_rank_ndcg_auc(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with ndcg objective function and compare auc metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:ndcg', 'auc')
|
||||
|
||||
def test_training_rank_ndcg_ndcg(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with ndcg objective function and compare ndcg metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:ndcg', 'ndcg')
|
||||
|
||||
def test_training_rank_map_map(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with map objective function and compare map metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:map', 'map')
|
||||
|
||||
def test_training_rank_map_auc(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with map objective function and compare auc metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:map', 'auc')
|
||||
|
||||
def test_training_rank_map_ndcg(self):
|
||||
"""
|
||||
Train an XGBoost ranking model with map objective function and compare ndcg metric
|
||||
"""
|
||||
self.__test_training_with_rank_objective('rank:map', 'ndcg')
|
||||
comp_training_with_rank_objective(dtrain, dtest, objective, metric)
|
||||
|
||||
@@ -128,12 +128,23 @@ def test_ranking():
|
||||
|
||||
x_test = np.random.rand(100, 10)
|
||||
|
||||
params = {'tree_method': 'exact', 'objective': 'rank:pairwise',
|
||||
'learning_rate': 0.1, 'gamma': 1.0, 'min_child_weight': 0.1,
|
||||
'max_depth': 6, 'n_estimators': 4}
|
||||
params = {
|
||||
"tree_method": "exact",
|
||||
"learning_rate": 0.1,
|
||||
"gamma": 1.0,
|
||||
"min_child_weight": 0.1,
|
||||
"max_depth": 6,
|
||||
"eval_metric": "ndcg",
|
||||
"n_estimators": 4,
|
||||
}
|
||||
model = xgb.sklearn.XGBRanker(**params)
|
||||
model.fit(x_train, y_train, group=train_group,
|
||||
eval_set=[(x_valid, y_valid)], eval_group=[valid_group])
|
||||
model.fit(
|
||||
x_train,
|
||||
y_train,
|
||||
group=train_group,
|
||||
eval_set=[(x_valid, y_valid)],
|
||||
eval_group=[valid_group],
|
||||
)
|
||||
assert model.evals_result()
|
||||
|
||||
pred = model.predict(x_test)
|
||||
@@ -145,11 +156,18 @@ def test_ranking():
|
||||
assert train_data.get_label().shape[0] == x_train.shape[0]
|
||||
valid_data.set_group(valid_group)
|
||||
|
||||
params_orig = {'tree_method': 'exact', 'objective': 'rank:pairwise',
|
||||
'eta': 0.1, 'gamma': 1.0,
|
||||
'min_child_weight': 0.1, 'max_depth': 6}
|
||||
xgb_model_orig = xgb.train(params_orig, train_data, num_boost_round=4,
|
||||
evals=[(valid_data, 'validation')])
|
||||
params_orig = {
|
||||
"tree_method": "exact",
|
||||
"objective": "rank:pairwise",
|
||||
"eta": 0.1,
|
||||
"gamma": 1.0,
|
||||
"min_child_weight": 0.1,
|
||||
"max_depth": 6,
|
||||
"eval_metric": "ndcg",
|
||||
}
|
||||
xgb_model_orig = xgb.train(
|
||||
params_orig, train_data, num_boost_round=4, evals=[(valid_data, "validation")]
|
||||
)
|
||||
pred_orig = xgb_model_orig.predict(test_data)
|
||||
|
||||
np.testing.assert_almost_equal(pred, pred_orig)
|
||||
@@ -165,7 +183,11 @@ def test_ranking_metric() -> None:
|
||||
# sklearn compares the number of mis-classified docs, while the one in xgboost
|
||||
# compares the number of mis-classified pairs.
|
||||
ltr = xgb.XGBRanker(
|
||||
eval_metric=roc_auc_score, n_estimators=10, tree_method="hist", max_depth=2
|
||||
eval_metric=roc_auc_score,
|
||||
n_estimators=10,
|
||||
tree_method="hist",
|
||||
max_depth=2,
|
||||
objective="rank:pairwise",
|
||||
)
|
||||
ltr.fit(
|
||||
X,
|
||||
|
||||
Reference in New Issue
Block a user