Make missing handling consistent with sklearn's portion of the python package
This commit is contained in:
parent
21d5ec7275
commit
f5e96eba72
@ -184,7 +184,7 @@ class DMatrix(object):
|
|||||||
_feature_names = None # for previous version's pickle
|
_feature_names = None # for previous version's pickle
|
||||||
_feature_types = None
|
_feature_types = None
|
||||||
|
|
||||||
def __init__(self, data, label=None, missing=0.0,
|
def __init__(self, data, label=None, missing=None,
|
||||||
weight=None, silent=False,
|
weight=None, silent=False,
|
||||||
feature_names=None, feature_types=None):
|
feature_names=None, feature_types=None):
|
||||||
"""
|
"""
|
||||||
@ -199,7 +199,8 @@ class DMatrix(object):
|
|||||||
label : list or numpy 1-D array, optional
|
label : list or numpy 1-D array, optional
|
||||||
Label of the training data.
|
Label of the training data.
|
||||||
missing : float, optional
|
missing : float, optional
|
||||||
Value in the data which needs to be present as a missing value.
|
Value in the data which needs to be present as a missing value. If
|
||||||
|
None, defaults to np.nan.
|
||||||
weight : list or numpy 1-D array , optional
|
weight : list or numpy 1-D array , optional
|
||||||
Weight for each instance.
|
Weight for each instance.
|
||||||
silent : boolean, optional
|
silent : boolean, optional
|
||||||
@ -278,6 +279,7 @@ class DMatrix(object):
|
|||||||
raise ValueError('Input numpy.ndarray must be 2 dimensional')
|
raise ValueError('Input numpy.ndarray must be 2 dimensional')
|
||||||
data = np.array(mat.reshape(mat.size), dtype=np.float32)
|
data = np.array(mat.reshape(mat.size), dtype=np.float32)
|
||||||
self.handle = ctypes.c_void_p()
|
self.handle = ctypes.c_void_p()
|
||||||
|
missing = missing if missing is not None else np.nan
|
||||||
_check_call(_LIB.XGDMatrixCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
|
_check_call(_LIB.XGDMatrixCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
|
||||||
mat.shape[0], mat.shape[1],
|
mat.shape[0], mat.shape[1],
|
||||||
ctypes.c_float(missing),
|
ctypes.c_float(missing),
|
||||||
@ -988,4 +990,3 @@ class Booster(object):
|
|||||||
msg = 'feature_names mismatch: {0} {1}'
|
msg = 'feature_names mismatch: {0} {1}'
|
||||||
raise ValueError(msg.format(self.feature_names,
|
raise ValueError(msg.format(self.feature_names,
|
||||||
data.feature_names))
|
data.feature_names))
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user