Bug mixing DMatrix's with and without feature names
This commit is contained in:
parent
ff4dda2102
commit
6bab164d80
@ -513,6 +513,9 @@ class DMatrix(object):
|
|||||||
-------
|
-------
|
||||||
feature_names : list or None
|
feature_names : list or None
|
||||||
"""
|
"""
|
||||||
|
if self._feature_names is None:
|
||||||
|
return ['f{0}'.format(i) for i in range(self.num_col())]
|
||||||
|
else:
|
||||||
return self._feature_names
|
return self._feature_names
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -567,7 +570,7 @@ class DMatrix(object):
|
|||||||
"""
|
"""
|
||||||
if feature_types is not None:
|
if feature_types is not None:
|
||||||
|
|
||||||
if self.feature_names is None:
|
if self._feature_names is None:
|
||||||
msg = 'Unable to set feature types before setting names'
|
msg = 'Unable to set feature types before setting names'
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
|
|
||||||
|
|||||||
@ -497,17 +497,8 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
"""
|
"""
|
||||||
b = self.booster()
|
b = self.booster()
|
||||||
fs = b.get_fscore()
|
fs = b.get_fscore()
|
||||||
if b.feature_names is None:
|
|
||||||
keys = [int(k.replace('f', '')) for k in fs.keys()]
|
|
||||||
all_features_dict = dict.fromkeys(range(0, self._features_count), 0)
|
|
||||||
fs_dict = dict(zip(keys, fs.values()))
|
|
||||||
all_features_dict.update(fs_dict)
|
|
||||||
all_features = np.fromiter(all_features_dict.values(),
|
|
||||||
dtype=np.float32)
|
|
||||||
else:
|
|
||||||
all_features = [fs.get(f, 0.) for f in b.feature_names]
|
all_features = [fs.get(f, 0.) for f in b.feature_names]
|
||||||
all_features = np.array(all_features, dtype=np.float32)
|
all_features = np.array(all_features, dtype=np.float32)
|
||||||
|
|
||||||
return all_features / all_features.sum()
|
return all_features / all_features.sum()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -91,7 +91,7 @@ class TestBasic(unittest.TestCase):
|
|||||||
|
|
||||||
# reset
|
# reset
|
||||||
dm.feature_names = None
|
dm.feature_names = None
|
||||||
assert dm.feature_names is None
|
self.assertEqual(dm.feature_names, ['f0', 'f1', 'f2', 'f3', 'f4'])
|
||||||
assert dm.feature_types is None
|
assert dm.feature_types is None
|
||||||
|
|
||||||
def test_feature_names(self):
|
def test_feature_names(self):
|
||||||
|
|||||||
@ -99,3 +99,20 @@ class TestModels(unittest.TestCase):
|
|||||||
num_round = 2
|
num_round = 2
|
||||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||||
metrics={'error'}, seed=0, show_stdv=False)
|
metrics={'error'}, seed=0, show_stdv=False)
|
||||||
|
|
||||||
|
def test_feature_names_validation(self):
|
||||||
|
X = np.random.random((10, 3))
|
||||||
|
y = np.random.randint(2, size=(10,))
|
||||||
|
|
||||||
|
dm1 = xgb.DMatrix(X, y)
|
||||||
|
dm2 = xgb.DMatrix(X, y, feature_names=("a", "b", "c"))
|
||||||
|
|
||||||
|
bst = xgb.train([], dm1)
|
||||||
|
bst.predict(dm1) # success
|
||||||
|
self.assertRaises(ValueError, bst.predict, dm2)
|
||||||
|
bst.predict(dm1) # success
|
||||||
|
|
||||||
|
bst = xgb.train([], dm2)
|
||||||
|
bst.predict(dm2) # success
|
||||||
|
self.assertRaises(ValueError, bst.predict, dm1)
|
||||||
|
bst.predict(dm2) # success
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user