diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index a25890606..b2db142fa 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -1105,6 +1105,18 @@ class XGBModel(XGBModelBase): booster = self.get_booster() return booster.num_features() + @property + def feature_names_in_(self) -> np.ndarray: + """Names of features seen during :py:meth:`fit`. Defined only when `X` has feature + names that are all strings.""" + feature_names = self.get_booster().feature_names + if feature_names is None: + raise AttributeError( + "`feature_names_in_` is defined only when `X` has feature names that " + "are all strings." + ) + return np.array(feature_names) + def _early_stopping_attr(self, attr: str) -> Union[float, int]: booster = self.get_booster() try: diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 83c73932b..aef4657ea 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -372,6 +372,9 @@ def test_boston_housing_regression(): assert mean_squared_error(preds3, labels) < 25 assert mean_squared_error(preds4, labels) < 350 + with pytest.raises(AttributeError, match="feature_names_in_"): + xgb_model.feature_names_in_ + def run_boston_housing_rf_regression(tree_method): from sklearn.metrics import mean_squared_error @@ -1017,6 +1020,8 @@ def test_pandas_input(): train = df.drop(columns=['status']) model = xgb.XGBClassifier() model.fit(train, target) + np.testing.assert_equal(model.feature_names_in_, np.array(feature_names)) + clf_isotonic = CalibratedClassifierCV(model, cv='prefit', method='isotonic') clf_isotonic.fit(train, target)