diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index dd44d7035..eb71e287b 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -513,7 +513,10 @@ class DMatrix(object): ------- feature_names : list or None """ - return self._feature_names + if self._feature_names is None: + return ['f{0}'.format(i) for i in range(self.num_col())] + else: + return self._feature_names @property def feature_types(self): @@ -567,7 +570,7 @@ class DMatrix(object): """ if feature_types is not None: - if self.feature_names is None: + if self._feature_names is None: msg = 'Unable to set feature types before setting names' raise ValueError(msg) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 50089969b..66ae44fff 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -497,17 +497,8 @@ class XGBClassifier(XGBModel, XGBClassifierBase): """ b = self.booster() fs = b.get_fscore() - if b.feature_names is None: - keys = [int(k.replace('f', '')) for k in fs.keys()] - all_features_dict = dict.fromkeys(range(0, self._features_count), 0) - fs_dict = dict(zip(keys, fs.values())) - all_features_dict.update(fs_dict) - all_features = np.fromiter(all_features_dict.values(), - dtype=np.float32) - else: - all_features = [fs.get(f, 0.) for f in b.feature_names] - all_features = np.array(all_features, dtype=np.float32) - + all_features = [fs.get(f, 0.) for f in b.feature_names] + all_features = np.array(all_features, dtype=np.float32) return all_features / all_features.sum() diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py index 6a342da40..0ac401c3a 100644 --- a/tests/python/test_basic.py +++ b/tests/python/test_basic.py @@ -91,7 +91,7 @@ class TestBasic(unittest.TestCase): # reset dm.feature_names = None - assert dm.feature_names is None + self.assertEqual(dm.feature_names, ['f0', 'f1', 'f2', 'f3', 'f4']) assert dm.feature_types is None def test_feature_names(self): diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py index 8895692da..f1229d78d 100644 --- a/tests/python/test_basic_models.py +++ b/tests/python/test_basic_models.py @@ -99,3 +99,20 @@ class TestModels(unittest.TestCase): num_round = 2 xgb.cv(param, dtrain, num_round, nfold=5, metrics={'error'}, seed=0, show_stdv=False) + + def test_feature_names_validation(self): + X = np.random.random((10, 3)) + y = np.random.randint(2, size=(10,)) + + dm1 = xgb.DMatrix(X, y) + dm2 = xgb.DMatrix(X, y, feature_names=("a", "b", "c")) + + bst = xgb.train([], dm1) + bst.predict(dm1) # success + self.assertRaises(ValueError, bst.predict, dm2) + bst.predict(dm1) # success + + bst = xgb.train([], dm2) + bst.predict(dm2) # success + self.assertRaises(ValueError, bst.predict, dm1) + bst.predict(dm2) # success