BUG: XGBClassifier.feature_importances_ raises ValueError if input is pandas DataFrame
This commit is contained in:
parent
4149854633
commit
c55cc809e5
@ -495,12 +495,19 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
feature_importances_ : array of shape = [n_features]
|
feature_importances_ : array of shape = [n_features]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
fs = self.booster().get_fscore()
|
b = self.booster()
|
||||||
|
fs = b.get_fscore()
|
||||||
|
if b.feature_names is None:
|
||||||
keys = [int(k.replace('f', '')) for k in fs.keys()]
|
keys = [int(k.replace('f', '')) for k in fs.keys()]
|
||||||
fs_dict = dict(zip(keys, fs.values()))
|
|
||||||
all_features_dict = dict.fromkeys(range(0, self._features_count), 0)
|
all_features_dict = dict.fromkeys(range(0, self._features_count), 0)
|
||||||
|
fs_dict = dict(zip(keys, fs.values()))
|
||||||
all_features_dict.update(fs_dict)
|
all_features_dict.update(fs_dict)
|
||||||
all_features = np.fromiter(all_features_dict.values(), np.float32)
|
all_features = np.fromiter(all_features_dict.values(),
|
||||||
|
dtype=np.float32)
|
||||||
|
else:
|
||||||
|
all_features = [fs.get(f, 0.) for f in b.feature_names]
|
||||||
|
all_features = np.array(all_features, dtype=np.float32)
|
||||||
|
|
||||||
return all_features / all_features.sum()
|
return all_features / all_features.sum()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
import random
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.metrics import mean_squared_error
|
from sklearn.metrics import mean_squared_error
|
||||||
@ -48,6 +49,37 @@ def test_multiclass_classification():
|
|||||||
check_pred(preds4, labels)
|
check_pred(preds4, labels)
|
||||||
|
|
||||||
|
|
||||||
|
def test_feature_importances():
|
||||||
|
digits = load_digits(2)
|
||||||
|
y = digits['target']
|
||||||
|
X = digits['data']
|
||||||
|
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
|
||||||
|
|
||||||
|
exp = np.array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.00833333, 0.,
|
||||||
|
0., 0., 0., 0., 0., 0., 0., 0.025, 0.14166667, 0., 0., 0.,
|
||||||
|
0., 0., 0., 0.00833333, 0.25833333, 0., 0., 0., 0.,
|
||||||
|
0.03333334, 0.03333334, 0., 0.32499999, 0., 0., 0., 0.,
|
||||||
|
0.05, 0.06666667, 0., 0., 0., 0., 0., 0., 0., 0.04166667,
|
||||||
|
0., 0., 0., 0., 0., 0., 0., 0.00833333, 0., 0., 0., 0.,
|
||||||
|
0.], dtype=np.float32)
|
||||||
|
|
||||||
|
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||||
|
|
||||||
|
# numeric columns
|
||||||
|
import pandas as pd
|
||||||
|
y = pd.Series(digits['target'])
|
||||||
|
X = pd.DataFrame(digits['data'])
|
||||||
|
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
|
||||||
|
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||||
|
|
||||||
|
# string columns, the feature order must be kept
|
||||||
|
chars = list('abcdefghijklmnopqrstuvwxyz')
|
||||||
|
X.columns = ["".join(random.sample(chars, 5)) for x in range(64)]
|
||||||
|
|
||||||
|
xgb_model = xgb.XGBClassifier(seed=0).fit(X, y)
|
||||||
|
np.testing.assert_almost_equal(xgb_model.feature_importances_, exp)
|
||||||
|
|
||||||
|
|
||||||
def test_boston_housing_regression():
|
def test_boston_housing_regression():
|
||||||
boston = load_boston()
|
boston = load_boston()
|
||||||
y = boston['target']
|
y = boston['target']
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user