From f5e96eba72bde51ce7feee2437608daedbfdffc5 Mon Sep 17 00:00:00 2001 From: Maxim Grechkin Date: Thu, 28 Jan 2016 14:16:11 -0800 Subject: [PATCH] Make missing handling consistent with sklearn's portion of the python package --- python-package/xgboost/core.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 8dc2a54b2..62f32e03c 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -184,7 +184,7 @@ class DMatrix(object): _feature_names = None # for previous version's pickle _feature_types = None - def __init__(self, data, label=None, missing=0.0, + def __init__(self, data, label=None, missing=None, weight=None, silent=False, feature_names=None, feature_types=None): """ @@ -199,7 +199,8 @@ class DMatrix(object): label : list or numpy 1-D array, optional Label of the training data. missing : float, optional - Value in the data which needs to be present as a missing value. + Value in the data which needs to be present as a missing value. If + None, defaults to np.nan. weight : list or numpy 1-D array , optional Weight for each instance. silent : boolean, optional @@ -278,6 +279,7 @@ class DMatrix(object): raise ValueError('Input numpy.ndarray must be 2 dimensional') data = np.array(mat.reshape(mat.size), dtype=np.float32) self.handle = ctypes.c_void_p() + missing = missing if missing is not None else np.nan _check_call(_LIB.XGDMatrixCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), mat.shape[0], mat.shape[1], ctypes.c_float(missing), @@ -988,4 +990,3 @@ class Booster(object): msg = 'feature_names mismatch: {0} {1}' raise ValueError(msg.format(self.feature_names, data.feature_names)) -