Make missing handling consistent with sklearn's portion of the python package

2016-01-28 14:16:11 -08:00
parent 21d5ec7275
commit f5e96eba72
1 changed files with 4 additions and 3 deletions
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -184,7 +184,7 @@ class DMatrix(object):
    _feature_names = None  # for previous version's pickle
    _feature_types = None

-    def __init__(self, data, label=None, missing=0.0,
+    def __init__(self, data, label=None, missing=None,
                 weight=None, silent=False,
                 feature_names=None, feature_types=None):
        """
@@ -199,7 +199,8 @@ class DMatrix(object):
        label : list or numpy 1-D array, optional
            Label of the training data.
        missing : float, optional
-            Value in the data which needs to be present as a missing value.
+            Value in the data which needs to be present as a missing value. If
+            None, defaults to np.nan.
        weight : list or numpy 1-D array , optional
            Weight for each instance.
        silent : boolean, optional
@@ -278,6 +279,7 @@ class DMatrix(object):
            raise ValueError('Input numpy.ndarray must be 2 dimensional')
        data = np.array(mat.reshape(mat.size), dtype=np.float32)
        self.handle = ctypes.c_void_p()
+        missing = missing if missing is not None else np.nan
        _check_call(_LIB.XGDMatrixCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
                                                mat.shape[0], mat.shape[1],
                                                ctypes.c_float(missing),
@@ -988,4 +990,3 @@ class Booster(object):
                msg = 'feature_names mismatch: {0} {1}'
                raise ValueError(msg.format(self.feature_names,
                                            data.feature_names))
-