Fix using categorical data with the score function of ranker. (#9753)
This commit is contained in:
parent
82828621d0
commit
c3a0622b49
@ -2099,7 +2099,17 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
|
||||
|
||||
"""
|
||||
X, qid = _get_qid(X, None)
|
||||
Xyq = DMatrix(X, y, qid=qid)
|
||||
# fixme(jiamingy): base margin and group weight is not yet supported. We might
|
||||
# need to make extra special fields in the dataframe.
|
||||
Xyq = DMatrix(
|
||||
X,
|
||||
y,
|
||||
qid=qid,
|
||||
missing=self.missing,
|
||||
enable_categorical=self.enable_categorical,
|
||||
nthread=self.n_jobs,
|
||||
feature_types=self.feature_types,
|
||||
)
|
||||
if callable(self.eval_metric):
|
||||
metric = ltr_metric_decorator(self.eval_metric, self.n_jobs)
|
||||
result_str = self.get_booster().eval_set([(Xyq, "eval")], feval=metric)
|
||||
|
||||
@ -75,3 +75,28 @@ def run_ranking_qid_df(impl: ModuleType, tree_method: str) -> None:
|
||||
|
||||
with pytest.raises(ValueError, match="Either `group` or `qid`."):
|
||||
ranker.fit(df, y, eval_set=[(X, y)])
|
||||
|
||||
|
||||
def run_ranking_categorical(device: str) -> None:
|
||||
"""Test LTR with categorical features."""
|
||||
from sklearn.model_selection import cross_val_score
|
||||
|
||||
X, y = tm.make_categorical(
|
||||
n_samples=512, n_features=10, n_categories=3, onehot=False
|
||||
)
|
||||
rng = np.random.default_rng(1994)
|
||||
qid = rng.choice(3, size=y.shape[0])
|
||||
qid = np.sort(qid)
|
||||
X["qid"] = qid
|
||||
|
||||
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
|
||||
ltr.fit(X, y)
|
||||
score = ltr.score(X, y)
|
||||
assert score > 0.9
|
||||
|
||||
ltr = xgb.XGBRanker(enable_categorical=True, device=device)
|
||||
|
||||
# test using the score function inside sklearn.
|
||||
scores = cross_val_score(ltr, X, y)
|
||||
for s in scores:
|
||||
assert s > 0.7
|
||||
|
||||
@ -10,7 +10,7 @@ import pytest
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.ranking import run_ranking_qid_df
|
||||
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
|
||||
|
||||
sys.path.append("tests/python")
|
||||
import test_with_sklearn as twskl # noqa
|
||||
@ -256,6 +256,11 @@ def test_ranking_qid_df():
|
||||
run_ranking_qid_df(cudf, "gpu_hist")
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_ranking_categorical() -> None:
|
||||
run_ranking_categorical(device="cuda")
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_cupy())
|
||||
@pytest.mark.mgpu
|
||||
def test_device_ordinal() -> None:
|
||||
|
||||
@ -12,7 +12,7 @@ from sklearn.utils.estimator_checks import parametrize_with_checks
|
||||
|
||||
import xgboost as xgb
|
||||
from xgboost import testing as tm
|
||||
from xgboost.testing.ranking import run_ranking_qid_df
|
||||
from xgboost.testing.ranking import run_ranking_categorical, run_ranking_qid_df
|
||||
from xgboost.testing.shared import get_feature_weights, validate_data_initialization
|
||||
from xgboost.testing.updater import get_basescore
|
||||
|
||||
@ -173,6 +173,11 @@ def test_ranking():
|
||||
np.testing.assert_almost_equal(pred, pred_orig)
|
||||
|
||||
|
||||
@pytest.mark.skipif(**tm.no_pandas())
|
||||
def test_ranking_categorical() -> None:
|
||||
run_ranking_categorical(device="cpu")
|
||||
|
||||
|
||||
def test_ranking_metric() -> None:
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user