Bug mixing DMatrix's with and without feature names
This commit is contained in:
parent
ff4dda2102
commit
6bab164d80
@ -513,6 +513,9 @@ class DMatrix(object):
|
||||
-------
|
||||
feature_names : list or None
|
||||
"""
|
||||
if self._feature_names is None:
|
||||
return ['f{0}'.format(i) for i in range(self.num_col())]
|
||||
else:
|
||||
return self._feature_names
|
||||
|
||||
@property
|
||||
@ -567,7 +570,7 @@ class DMatrix(object):
|
||||
"""
|
||||
if feature_types is not None:
|
||||
|
||||
if self.feature_names is None:
|
||||
if self._feature_names is None:
|
||||
msg = 'Unable to set feature types before setting names'
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
@ -497,17 +497,8 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
"""
|
||||
b = self.booster()
|
||||
fs = b.get_fscore()
|
||||
if b.feature_names is None:
|
||||
keys = [int(k.replace('f', '')) for k in fs.keys()]
|
||||
all_features_dict = dict.fromkeys(range(0, self._features_count), 0)
|
||||
fs_dict = dict(zip(keys, fs.values()))
|
||||
all_features_dict.update(fs_dict)
|
||||
all_features = np.fromiter(all_features_dict.values(),
|
||||
dtype=np.float32)
|
||||
else:
|
||||
all_features = [fs.get(f, 0.) for f in b.feature_names]
|
||||
all_features = np.array(all_features, dtype=np.float32)
|
||||
|
||||
return all_features / all_features.sum()
|
||||
|
||||
|
||||
|
||||
@ -91,7 +91,7 @@ class TestBasic(unittest.TestCase):
|
||||
|
||||
# reset
|
||||
dm.feature_names = None
|
||||
assert dm.feature_names is None
|
||||
self.assertEqual(dm.feature_names, ['f0', 'f1', 'f2', 'f3', 'f4'])
|
||||
assert dm.feature_types is None
|
||||
|
||||
def test_feature_names(self):
|
||||
|
||||
@ -99,3 +99,20 @@ class TestModels(unittest.TestCase):
|
||||
num_round = 2
|
||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||
metrics={'error'}, seed=0, show_stdv=False)
|
||||
|
||||
def test_feature_names_validation(self):
|
||||
X = np.random.random((10, 3))
|
||||
y = np.random.randint(2, size=(10,))
|
||||
|
||||
dm1 = xgb.DMatrix(X, y)
|
||||
dm2 = xgb.DMatrix(X, y, feature_names=("a", "b", "c"))
|
||||
|
||||
bst = xgb.train([], dm1)
|
||||
bst.predict(dm1) # success
|
||||
self.assertRaises(ValueError, bst.predict, dm2)
|
||||
bst.predict(dm1) # success
|
||||
|
||||
bst = xgb.train([], dm2)
|
||||
bst.predict(dm2) # success
|
||||
self.assertRaises(ValueError, bst.predict, dm1)
|
||||
bst.predict(dm2) # success
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user