a more verbose field mismatch error message

This error message can be hard to understand when there are several fields, as shown in the example below. This improves the error message, letting the user know which fields were unexpected or missing.

    import xgboost as xgb
    import pandas as pd
    train = pd.DataFrame({'a':[1], 'b':[2], 'c':[3], 'd':[4], 'f':[2], 'g':2, 'etc etc etc':[11]})
    dtrain = xgb.DMatrix(train.drop('d', axis=1), train.d)
    test = pd.DataFrame({'a':[1], 'b':[2], 'c':[1], 'd':[4], 'e':[2], 'f':[2], 'g':2, 'etc etc etc':[11]})
    dtest = xgb.DMatrix(test)
    modl = xgb.train({}, dtrain)
    modl.predict(dtest)
    
    
    # ValueError: feature_names mismatch: [u'a', u'b', u'c', u'etc etc etc', u'f', u'g'] [u'a', u'b', u'c', u'd', u'e', u'etc etc etc', u'f', u'g']
This commit is contained in:
Julian Quick 2016-03-17 18:13:30 -06:00
parent c449dc6874
commit 2cd109fb98

View File

@ -1038,6 +1038,10 @@ class Booster(object):
else:
# Booster can't accept data with different feature names
if self.feature_names != data.feature_names:
dat_missing = set(self.feature_names) - set(data.feature_names)
my_missing = set(data.feature_names) - set(self.feature_names)
msg = 'feature_names mismatch: {0} {1}'
if dat_missing: msg +='\nexpected ' + ', '.join(str(s) for s in dat_missing) +' in input data'
if my_missing: msg +='\ntraining data did not have the following fields: ' + ', '.join(str(s) for s in my_missing)
raise ValueError(msg.format(self.feature_names,
data.feature_names))