Fix bug of using list(x) function when x is string (#3432)

* Fix bug of using list(x) function when x is string

list('abcdcba') = ['a', 'b', 'c', 'd', 'c', 'b', 'a']

* Allow feature_names/feature_types to be of any type

If feature_names/feature_types is iterable, e.g. tuple, list, then convert the value to list, except for string; otherwise construct a list with a single value

* Delete excess whitespace

* Fix whitespace to pass lint
This commit is contained in:
wenduowang 2018-07-30 08:36:34 -06:00 committed by Philip Hyunsu Cho
parent dd07c25d12
commit 3b62e75f2e

View File

@ -766,8 +766,14 @@ class DMatrix(object):
""" """
if feature_names is not None: if feature_names is not None:
# validate feature name # validate feature name
if not isinstance(feature_names, list): try:
feature_names = list(feature_names) if not isinstance(feature_names, str):
feature_names = [n for n in iter(feature_names)]
else:
feature_names = [feature_names]
except TypeError:
feature_names = [feature_names]
if len(feature_names) != len(set(feature_names)): if len(feature_names) != len(set(feature_names)):
raise ValueError('feature_names must be unique') raise ValueError('feature_names must be unique')
if len(feature_names) != self.num_col(): if len(feature_names) != self.num_col():
@ -796,7 +802,6 @@ class DMatrix(object):
Labels for features. None will reset existing feature names Labels for features. None will reset existing feature names
""" """
if feature_types is not None: if feature_types is not None:
if self._feature_names is None: if self._feature_names is None:
msg = 'Unable to set feature types before setting names' msg = 'Unable to set feature types before setting names'
raise ValueError(msg) raise ValueError(msg)
@ -805,8 +810,14 @@ class DMatrix(object):
# single string will be applied to all columns # single string will be applied to all columns
feature_types = [feature_types] * self.num_col() feature_types = [feature_types] * self.num_col()
if not isinstance(feature_types, list): try:
feature_types = list(feature_types) if not isinstance(feature_types, str):
feature_types = [n for n in iter(feature_types)]
else:
feature_types = [feature_types]
except TypeError:
feature_types = [feature_types]
if len(feature_types) != self.num_col(): if len(feature_types) != self.num_col():
msg = 'feature_types must have the same length as data' msg = 'feature_types must have the same length as data'
raise ValueError(msg) raise ValueError(msg)