a more verbose field mismatch error message
This error message can be hard to understand when there are several fields, as shown in the example below. This improves the error message, letting the user know which fields were unexpected or missing.
import xgboost as xgb
import pandas as pd
train = pd.DataFrame({'a':[1], 'b':[2], 'c':[3], 'd':[4], 'f':[2], 'g':2, 'etc etc etc':[11]})
dtrain = xgb.DMatrix(train.drop('d', axis=1), train.d)
test = pd.DataFrame({'a':[1], 'b':[2], 'c':[1], 'd':[4], 'e':[2], 'f':[2], 'g':2, 'etc etc etc':[11]})
dtest = xgb.DMatrix(test)
modl = xgb.train({}, dtrain)
modl.predict(dtest)
# ValueError: feature_names mismatch: [u'a', u'b', u'c', u'etc etc etc', u'f', u'g'] [u'a', u'b', u'c', u'd', u'e', u'etc etc etc', u'f', u'g']
This commit is contained in:
parent
c449dc6874
commit
2cd109fb98
@ -1038,6 +1038,10 @@ class Booster(object):
|
||||
else:
|
||||
# Booster can't accept data with different feature names
|
||||
if self.feature_names != data.feature_names:
|
||||
dat_missing = set(self.feature_names) - set(data.feature_names)
|
||||
my_missing = set(data.feature_names) - set(self.feature_names)
|
||||
msg = 'feature_names mismatch: {0} {1}'
|
||||
if dat_missing: msg +='\nexpected ' + ', '.join(str(s) for s in dat_missing) +' in input data'
|
||||
if my_missing: msg +='\ntraining data did not have the following fields: ' + ', '.join(str(s) for s in my_missing)
|
||||
raise ValueError(msg.format(self.feature_names,
|
||||
data.feature_names))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user