From 2cd109fb98a08fd0b3e8757a76c3d3c338d8befd Mon Sep 17 00:00:00 2001 From: Julian Quick Date: Thu, 17 Mar 2016 18:13:30 -0600 Subject: [PATCH] a more verbose field mismatch error message This error message can be hard to understand when there are several fields, as shown in the example below. This improves the error message, letting the user know which fields were unexpected or missing. import xgboost as xgb import pandas as pd train = pd.DataFrame({'a':[1], 'b':[2], 'c':[3], 'd':[4], 'f':[2], 'g':2, 'etc etc etc':[11]}) dtrain = xgb.DMatrix(train.drop('d', axis=1), train.d) test = pd.DataFrame({'a':[1], 'b':[2], 'c':[1], 'd':[4], 'e':[2], 'f':[2], 'g':2, 'etc etc etc':[11]}) dtest = xgb.DMatrix(test) modl = xgb.train({}, dtrain) modl.predict(dtest) # ValueError: feature_names mismatch: [u'a', u'b', u'c', u'etc etc etc', u'f', u'g'] [u'a', u'b', u'c', u'd', u'e', u'etc etc etc', u'f', u'g'] --- python-package/xgboost/core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 0b6949cf1..971d0a95b 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -1038,6 +1038,10 @@ class Booster(object): else: # Booster can't accept data with different feature names if self.feature_names != data.feature_names: + dat_missing = set(self.feature_names) - set(data.feature_names) + my_missing = set(data.feature_names) - set(self.feature_names) msg = 'feature_names mismatch: {0} {1}' + if dat_missing: msg +='\nexpected ' + ', '.join(str(s) for s in dat_missing) +' in input data' + if my_missing: msg +='\ntraining data did not have the following fields: ' + ', '.join(str(s) for s in my_missing) raise ValueError(msg.format(self.feature_names, data.feature_names))