BUG: XGBClassifier.feature_importances_ raises ValueError if input is pandas DataFrame

This commit is contained in:
sinhrks 2016-04-24 14:34:09 +09:00
parent 4149854633
commit c55cc809e5
2 changed files with 45 additions and 6 deletions

View File

@ -495,12 +495,19 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
feature_importances_ : array of shape = [n_features]
"""
fs = self.booster().get_fscore()
b = self.booster()
fs = b.get_fscore()
if b.feature_names is None:
keys = [int(k.replace('f', '')) for k in fs.keys()]
fs_dict = dict(zip(keys, fs.values()))
all_features_dict = dict.fromkeys(range(0, self._features_count), 0)
fs_dict = dict(zip(keys, fs.values()))
all_features_dict.update(fs_dict)
all_features = np.fromiter(all_features_dict.values(), np.float32)
all_features = np.fromiter(all_features_dict.values(),
dtype=np.float32)
else:
all_features = [fs.get(f, 0.) for f in b.feature_names]
all_features = np.array(all_features, dtype=np.float32)
return all_features / all_features.sum()

View File

@ -1,3 +1,4 @@
import random
import xgboost as xgb
import numpy as np
from sklearn.metrics import mean_squared_error
@ -48,6 +49,37 @@ def test_multiclass_classification():
check_pred(preds4, labels)
def test_feature_importances():
digits = load_digits(2)
y = digits['target']
X = digits['data']
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,
0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,
0., 0., 0., 0.00833333, 0.25833333, 0., 0., 0., 0.,
0.03333334, 0.03333334, 0., 0.32499999, 0., 0., 0., 0.,
0.05, 0.06666667, 0., 0., 0., 0., 0., 0., 0., 0.04166667,
0., 0., 0., 0., 0., 0., 0., 0.00833333, 0., 0., 0., 0.,
0.], dtype=np.float32)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
# numeric columns
import pandas as pd
y = pd.Series(digits['target'])
X = pd.DataFrame(digits['data'])
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
# string columns, the feature order must be kept
chars = list('abcdefghijklmnopqrstuvwxyz')
X.columns = ["".join(random.sample(chars, 5)) for x in range(64)]
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
def test_boston_housing_regression():
boston = load_boston()
y = boston['target']