Support sklearn cross validation for ranker. (#8859)

* Support sklearn cross validation for ranker.

- Add a convention for X to include a special `qid` column.

sklearn utilities consider only `X`, `y` and `sample_weight` for supervised learning
algorithms, but we need an additional qid array for ranking.

It's important to be able to support the cross validation function in sklearn since all
other tuning functions like grid search are based on cross validation.
This commit is contained in:
Jiaming Yuan
2023-03-07 00:22:08 +08:00
committed by GitHub
parent cad7401783
commit 7eba285a1e
8 changed files with 232 additions and 43 deletions

View File

@@ -11,6 +11,7 @@ from sklearn.utils.estimator_checks import parametrize_with_checks
import xgboost as xgb
from xgboost import testing as tm
from xgboost.testing.ranking import run_ranking_qid_df
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
from xgboost.testing.updater import get_basescore
@@ -180,6 +181,13 @@ def test_ranking_metric() -> None:
assert results["validation_0"]["roc_auc_score"][-1] > 0.6
@pytest.mark.skipif(**tm.no_pandas())
def test_ranking_qid_df():
import pandas as pd
run_ranking_qid_df(pd, "hist")
def test_stacking_regression():
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor, StackingRegressor