Support sklearn cross validation for ranker. (#8859)

* Support sklearn cross validation for ranker. - Add a convention for X to include a special `qid` column. sklearn utilities consider only `X`, `y` and `sample_weight` for supervised learning algorithms, but we need an additional qid array for ranking. It's important to be able to support the cross validation function in sklearn since all other tuning functions like grid search are based on cross validation.
2023-03-07 00:22:08 +08:00
parent cad7401783
commit 7eba285a1e
8 changed files with 232 additions and 43 deletions
--- a/tests/python-gpu/test_gpu_with_sklearn.py
+++ b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -8,6 +8,7 @@ import pytest

 import xgboost as xgb
 from xgboost import testing as tm
+from xgboost.testing.ranking import run_ranking_qid_df

 sys.path.append("tests/python")
 import test_with_sklearn as twskl  # noqa
@@ -153,3 +154,10 @@ def test_classififer():
    y *= 10
    with pytest.raises(ValueError, match=r"Invalid classes.*"):
        clf.fit(X, y)
+
+
+@pytest.mark.skipif(**tm.no_pandas())
+def test_ranking_qid_df():
+    import cudf
+
+    run_ranking_qid_df(cudf, "gpu_hist")