Bug mixing DMatrix's with and without feature names

This commit is contained in:
sinhrks 2016-04-29 13:51:34 +09:00
parent ff4dda2102
commit 6bab164d80
4 changed files with 25 additions and 14 deletions

View File

@ -513,6 +513,9 @@ class DMatrix(object):
-------
feature_names : list or None
"""
if self._feature_names is None:
return ['f{0}'.format(i) for i in range(self.num_col())]
else:
return self._feature_names
@property
@ -567,7 +570,7 @@ class DMatrix(object):
"""
if feature_types is not None:
if self.feature_names is None:
if self._feature_names is None:
msg = 'Unable to set feature types before setting names'
raise ValueError(msg)

View File

@ -497,17 +497,8 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
"""
b = self.booster()
fs = b.get_fscore()
if b.feature_names is None:
keys = [int(k.replace('f', '')) for k in fs.keys()]
all_features_dict = dict.fromkeys(range(0, self._features_count), 0)
fs_dict = dict(zip(keys, fs.values()))
all_features_dict.update(fs_dict)
all_features = np.fromiter(all_features_dict.values(),
dtype=np.float32)
else:
all_features = [fs.get(f, 0.) for f in b.feature_names]
all_features = np.array(all_features, dtype=np.float32)
return all_features / all_features.sum()

View File

@ -91,7 +91,7 @@ class TestBasic(unittest.TestCase):
# reset
dm.feature_names = None
assert dm.feature_names is None
self.assertEqual(dm.feature_names, ['f0', 'f1', 'f2', 'f3', 'f4'])
assert dm.feature_types is None
def test_feature_names(self):

View File

@ -99,3 +99,20 @@ class TestModels(unittest.TestCase):
num_round = 2
xgb.cv(param, dtrain, num_round, nfold=5,
metrics={'error'}, seed=0, show_stdv=False)
def test_feature_names_validation(self):
X = np.random.random((10, 3))
y = np.random.randint(2, size=(10,))
dm1 = xgb.DMatrix(X, y)
dm2 = xgb.DMatrix(X, y, feature_names=("a", "b", "c"))
bst = xgb.train([], dm1)
bst.predict(dm1) # success
self.assertRaises(ValueError, bst.predict, dm2)
bst.predict(dm1) # success
bst = xgb.train([], dm2)
bst.predict(dm2) # success
self.assertRaises(ValueError, bst.predict, dm1)
bst.predict(dm2) # success