121 lines
3.8 KiB
Python

# pylint: disable=too-many-locals
"""Tests for learning to rank."""
from types import ModuleType
from typing import Any
import numpy as np
import pytest
import xgboost as xgb
from xgboost import testing as tm
def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
"""Test ranking with qid packed into X."""
import scipy.sparse
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedGroupKFold, cross_val_score
X, y, q, _ = tm.make_ltr(n_samples=128, n_features=2, n_query_groups=8, max_rel=3)
# pack qid into x using dataframe
df = impl.DataFrame(X)
df["qid"] = q
ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg", tree_method=tree_method)
ranker.fit(df, y)
s = ranker.score(df, y)
assert s > 0.7
# works with validation datasets as well
valid_df = df.copy()
valid_df.iloc[0, 0] = 3.0
ranker.fit(df, y, eval_set=[(valid_df, y)])
# same as passing qid directly
ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg", tree_method=tree_method)
ranker.fit(X, y, qid=q)
s1 = ranker.score(df, y)
assert np.isclose(s, s1)
# Works with standard sklearn cv
if tree_method != "gpu_hist":
# we need cuML for this.
kfold = StratifiedGroupKFold(shuffle=False)
results = cross_val_score(ranker, df, y, cv=kfold, groups=df.qid)
assert len(results) == 5
# Works with custom metric
def neg_mse(*args: Any, **kwargs: Any) -> float:
return -float(mean_squared_error(*args, **kwargs))
ranker = xgb.XGBRanker(
n_estimators=3,
eval_metric=neg_mse,
tree_method=tree_method,
disable_default_eval_metric=True,
)
ranker.fit(df, y, eval_set=[(valid_df, y)])
score = ranker.score(valid_df, y)
assert np.isclose(score, ranker.evals_result()["validation_0"]["neg_mse"][-1])
# Works with sparse data
if tree_method != "gpu_hist":
# no sparse with cuDF
X_csr = scipy.sparse.csr_matrix(X)
df = impl.DataFrame.sparse.from_spmatrix(
X_csr, columns=[str(i) for i in range(X.shape[1])]
)
df["qid"] = q
ranker = xgb.XGBRanker(
n_estimators=3, eval_metric="ndcg", tree_method=tree_method
)
ranker.fit(df, y)
s2 = ranker.score(df, y)
assert np.isclose(s2, s)
with pytest.raises(ValueError, match="Either `group` or `qid`."):
ranker.fit(df, y, eval_set=[(X, y)])
def run_ranking_categorical(device: str) -> None:
"""Test LTR with categorical features."""
from sklearn.model_selection import cross_val_score
X, y = tm.make_categorical(
n_samples=512, n_features=10, n_categories=3, onehot=False
)
rng = np.random.default_rng(1994)
qid = rng.choice(3, size=y.shape[0])
qid = np.sort(qid)
X["qid"] = qid
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
ltr.fit(X, y)
score = ltr.score(X, y)
assert score > 0.9
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
# test using the score function inside sklearn.
scores = cross_val_score(ltr, X, y)
for s in scores:
assert s > 0.7
def run_normalization(device: str) -> None:
"""Test normalization."""
X, y, qid, _ = tm.make_ltr(2048, 4, 64, 3)
ltr = xgb.XGBRanker(objective="rank:pairwise", n_estimators=4, device=device)
ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
e0 = ltr.evals_result()
ltr = xgb.XGBRanker(
objective="rank:pairwise",
n_estimators=4,
device=device,
lambdarank_normalization=False,
)
ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid])
e1 = ltr.evals_result()
assert e1["validation_0"]["ndcg@32"][-1] > e0["validation_0"]["ndcg@32"][-1]