Support sklearn cross validation for ranker. (#8859)
* Support sklearn cross validation for ranker. - Add a convention for X to include a special `qid` column. sklearn utilities consider only `X`, `y` and `sample_weight` for supervised learning algorithms, but we need an additional qid array for ranking. It's important to be able to support the cross validation function in sklearn since all other tuning functions like grid search are based on cross validation.
This commit is contained in:
@@ -8,6 +8,7 @@ import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.ranking import run_ranking_qid_df
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_with_sklearn as twskl # noqa
|
||||
@@ -153,3 +154,10 @@ def test_classififer():
|
||||
y *= 10
|
||||
with pytest.raises(ValueError, match=r"Invalid classes.*"):
|
||||
clf.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_ranking_qid_df():
|
||||
import cudf
|
||||
|
||||
run_ranking_qid_df(cudf, "gpu_hist")
|
||||
|
||||
@@ -11,6 +11,7 @@ from sklearn.utils.estimator_checks import parametrize_with_checks
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.ranking import run_ranking_qid_df
|
||||
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
|
||||
from xgboost.testing.updater import get_basescore
|
||||
|
||||
@@ -180,6 +181,13 @@ def test_ranking_metric() -> None:
|
||||
assert results["validation_0"]["roc_auc_score"][-1] > 0.6
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_ranking_qid_df():
|
||||
import pandas as pd
|
||||
|
||||
run_ranking_qid_df(pd, "hist")
|
||||
|
||||
|
||||
def test_stacking_regression():
|
||||
from sklearn.datasets import load_diabetes
|
||||
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
|
||||
|
||||
Reference in New Issue
Block a user