BUG: XGBClassifier.feature_importances_ raises ValueError if input is pandas DataFrame
This commit is contained in:
parent
4149854633
commit
c55cc809e5
@ -495,12 +495,19 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
feature_importances_ : array of shape = [n_features]
|
||||
|
||||
"""
|
||||
fs = self.booster().get_fscore()
|
||||
keys = [int(k.replace('f', '')) for k in fs.keys()]
|
||||
fs_dict = dict(zip(keys, fs.values()))
|
||||
all_features_dict = dict.fromkeys(range(0, self._features_count), 0)
|
||||
all_features_dict.update(fs_dict)
|
||||
all_features = np.fromiter(all_features_dict.values(), np.float32)
|
||||
b = self.booster()
|
||||
fs = b.get_fscore()
|
||||
if b.feature_names is None:
|
||||
keys = [int(k.replace('f', '')) for k in fs.keys()]
|
||||
all_features_dict = dict.fromkeys(range(0, self._features_count), 0)
|
||||
fs_dict = dict(zip(keys, fs.values()))
|
||||
all_features_dict.update(fs_dict)
|
||||
all_features = np.fromiter(all_features_dict.values(),
|
||||
dtype=np.float32)
|
||||
else:
|
||||
all_features = [fs.get(f, 0.) for f in b.feature_names]
|
||||
all_features = np.array(all_features, dtype=np.float32)
|
||||
|
||||
return all_features / all_features.sum()
|
||||
|
||||
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import random
|
||||
import xgboost as xgb
|
||||
import numpy as np
|
||||
from sklearn.metrics import mean_squared_error
|
||||
@ -48,6 +49,37 @@ def test_multiclass_classification():
|
||||
check_pred(preds4, labels)
|
||||
|
||||
|
||||
def test_feature_importances():
|
||||
digits = load_digits(2)
|
||||
y = digits['target']
|
||||
X = digits['data']
|
||||
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
|
||||
|
||||
exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,
|
||||
0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,
|
||||
0., 0., 0., 0.00833333, 0.25833333, 0., 0., 0., 0.,
|
||||
0.03333334, 0.03333334, 0., 0.32499999, 0., 0., 0., 0.,
|
||||
0.05, 0.06666667, 0., 0., 0., 0., 0., 0., 0., 0.04166667,
|
||||
0., 0., 0., 0., 0., 0., 0., 0.00833333, 0., 0., 0., 0.,
|
||||
0.], dtype=np.float32)
|
||||
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
# numeric columns
|
||||
import pandas as pd
|
||||
y = pd.Series(digits['target'])
|
||||
X = pd.DataFrame(digits['data'])
|
||||
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
# string columns, the feature order must be kept
|
||||
chars = list('abcdefghijklmnopqrstuvwxyz')
|
||||
X.columns = ["".join(random.sample(chars, 5)) for x in range(64)]
|
||||
|
||||
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
|
||||
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||
|
||||
|
||||
def test_boston_housing_regression():
|
||||
boston = load_boston()
|
||||
y = boston['target']
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user