Make missing handling consistent with sklearn's portion of the python package
This commit is contained in:
parent
21d5ec7275
commit
f5e96eba72
@ -184,7 +184,7 @@ class DMatrix(object):
|
||||
_feature_names = None # for previous version's pickle
|
||||
_feature_types = None
|
||||
|
||||
def __init__(self, data, label=None, missing=0.0,
|
||||
def __init__(self, data, label=None, missing=None,
|
||||
weight=None, silent=False,
|
||||
feature_names=None, feature_types=None):
|
||||
"""
|
||||
@ -199,7 +199,8 @@ class DMatrix(object):
|
||||
label : list or numpy 1-D array, optional
|
||||
Label of the training data.
|
||||
missing : float, optional
|
||||
Value in the data which needs to be present as a missing value.
|
||||
Value in the data which needs to be present as a missing value. If
|
||||
None, defaults to np.nan.
|
||||
weight : list or numpy 1-D array , optional
|
||||
Weight for each instance.
|
||||
silent : boolean, optional
|
||||
@ -278,6 +279,7 @@ class DMatrix(object):
|
||||
raise ValueError('Input numpy.ndarray must be 2 dimensional')
|
||||
data = np.array(mat.reshape(mat.size), dtype=np.float32)
|
||||
self.handle = ctypes.c_void_p()
|
||||
missing = missing if missing is not None else np.nan
|
||||
_check_call(_LIB.XGDMatrixCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
|
||||
mat.shape[0], mat.shape[1],
|
||||
ctypes.c_float(missing),
|
||||
@ -988,4 +990,3 @@ class Booster(object):
|
||||
msg = 'feature_names mismatch: {0} {1}'
|
||||
raise ValueError(msg.format(self.feature_names,
|
||||
data.feature_names))
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user