# pylint: disable=too-many-locals """Tests for learning to rank.""" from types import ModuleType from typing import Any import numpy as np import pytest import xgboost as xgb from xgboost import testing as tm def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None: """Test ranking with qid packed into X.""" import scipy.sparse from sklearn.metrics import mean_squared_error from sklearn.model_selection import StratifiedGroupKFold, cross_val_score X, y, q, _ = tm.make_ltr(n_samples=128, n_features=2, n_query_groups=8, max_rel=3) # pack qid into x using dataframe df = impl.DataFrame(X) df["qid"] = q ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg", tree_method=tree_method) ranker.fit(df, y) s = ranker.score(df, y) assert s > 0.7 # works with validation datasets as well valid_df = df.copy() valid_df.iloc[0, 0] = 3.0 ranker.fit(df, y, eval_set=[(valid_df, y)]) # same as passing qid directly ranker = xgb.XGBRanker(n_estimators=3, eval_metric="ndcg", tree_method=tree_method) ranker.fit(X, y, qid=q) s1 = ranker.score(df, y) assert np.isclose(s, s1) # Works with standard sklearn cv if tree_method != "gpu_hist": # we need cuML for this. kfold = StratifiedGroupKFold(shuffle=False) results = cross_val_score(ranker, df, y, cv=kfold, groups=df.qid) assert len(results) == 5 # Works with custom metric def neg_mse(*args: Any, **kwargs: Any) -> float: return -float(mean_squared_error(*args, **kwargs)) ranker = xgb.XGBRanker( n_estimators=3, eval_metric=neg_mse, tree_method=tree_method, disable_default_eval_metric=True, ) ranker.fit(df, y, eval_set=[(valid_df, y)]) score = ranker.score(valid_df, y) assert np.isclose(score, ranker.evals_result()["validation_0"]["neg_mse"][-1]) # Works with sparse data if tree_method != "gpu_hist": # no sparse with cuDF X_csr = scipy.sparse.csr_matrix(X) df = impl.DataFrame.sparse.from_spmatrix( X_csr, columns=[str(i) for i in range(X.shape[1])] ) df["qid"] = q ranker = xgb.XGBRanker( n_estimators=3, eval_metric="ndcg", tree_method=tree_method ) ranker.fit(df, y) s2 = ranker.score(df, y) assert np.isclose(s2, s) with pytest.raises(ValueError, match="Either `group` or `qid`."): ranker.fit(df, y, eval_set=[(X, y)]) def run_ranking_categorical(device: str) -> None: """Test LTR with categorical features.""" from sklearn.model_selection import cross_val_score X, y = tm.make_categorical( n_samples=512, n_features=10, n_categories=3, onehot=False ) rng = np.random.default_rng(1994) qid = rng.choice(3, size=y.shape[0]) qid = np.sort(qid) X["qid"] = qid ltr = xgb.XGBRanker(enable_categorical=True, device=device) ltr.fit(X, y) score = ltr.score(X, y) assert score > 0.9 ltr = xgb.XGBRanker(enable_categorical=True, device=device) # test using the score function inside sklearn. scores = cross_val_score(ltr, X, y) for s in scores: assert s > 0.7 def run_normalization(device: str) -> None: """Test normalization.""" X, y, qid, _ = tm.make_ltr(2048, 4, 64, 3) ltr = xgb.XGBRanker(objective="rank:pairwise", n_estimators=4, device=device) ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid]) e0 = ltr.evals_result() ltr = xgb.XGBRanker( objective="rank:pairwise", n_estimators=4, device=device, lambdarank_normalization=False, ) ltr.fit(X, y, qid=qid, eval_set=[(X, y)], eval_qid=[qid]) e1 = ltr.evals_result() assert e1["validation_0"]["ndcg@32"][-1] > e0["validation_0"]["ndcg@32"][-1]