Enforce input data is not object. (#6927)

* Check for object data type.
* Allow strided arrays with greater underlying buffer size.
This commit is contained in:
Jiaming Yuan 2021-05-02 00:09:01 +08:00 committed by GitHub
parent a1d23f6613
commit 37ad60fe25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 31 additions and 4 deletions

View File

@ -233,6 +233,9 @@ def _numpy2ctypes_type(dtype):
def _array_interface(data: np.ndarray) -> bytes:
assert (
data.dtype.hasobject is False
), "Input data contains `object` dtype. Expecting numeric data."
interface = data.__array_interface__
if "mask" in interface:
interface["mask"] = interface["mask"].__array_interface__
@ -1908,8 +1911,8 @@ class Booster(object):
)
if isinstance(data, np.ndarray):
from .data import _maybe_np_slice
data = _maybe_np_slice(data, data.dtype)
from .data import _ensure_np_dtype
data, _ = _ensure_np_dtype(data, data.dtype)
_check_call(
_LIB.XGBoosterPredictFromDense(
self.handle,

View File

@ -104,6 +104,13 @@ def _is_numpy_array(data):
return isinstance(data, (np.ndarray, np.matrix))
def _ensure_np_dtype(data, dtype):
if data.dtype.hasobject:
data = data.astype(np.float32, copy=False)
dtype = np.float32
return data, dtype
def _maybe_np_slice(data, dtype):
'''Handle numpy slice. This can be removed if we use __array_interface__.
'''
@ -118,6 +125,7 @@ def _maybe_np_slice(data, dtype):
data = np.array(data, copy=False, dtype=dtype)
except AttributeError:
data = np.array(data, copy=False, dtype=dtype)
data, dtype = _ensure_np_dtype(data, dtype)
return data

View File

@ -229,8 +229,11 @@ class ArrayInterfaceHandler {
}
strides[1] = n;
}
auto valid = (rows - 1) * strides[0] + (cols - 1) * strides[1] == (rows * cols) - 1;
CHECK(valid) << "Invalid strides in array.";
auto valid = rows * strides[0] + cols * strides[1] >= (rows * cols);
CHECK(valid) << "Invalid strides in array."
<< " strides: (" << strides[0] << "," << strides[1]
<< "), shape: (" << rows << ", " << cols << ")";
}
static void* ExtractData(std::map<std::string, Json> const &column,

View File

@ -155,6 +155,14 @@ class TestInplacePredict:
predt_from_array = booster.inplace_predict(X[:10, ...], missing=self.missing)
predt_from_dmatrix = booster.predict(test)
X_obj = X.copy().astype(object)
assert X_obj.dtype.hasobject is True
assert X.dtype.hasobject is False
np.testing.assert_allclose(
booster.inplace_predict(X_obj), booster.inplace_predict(X)
)
np.testing.assert_allclose(predt_from_dmatrix, predt_from_array)
predt_from_array = booster.inplace_predict(
@ -192,8 +200,13 @@ class TestInplacePredict:
arr_predt = booster.inplace_predict(X)
dmat_predt = booster.predict(xgb.DMatrix(X))
X = df.values
X = np.asfortranarray(X)
fort_predt = booster.inplace_predict(X)
np.testing.assert_allclose(dmat_predt, arr_predt)
np.testing.assert_allclose(df_predt, arr_predt)
np.testing.assert_allclose(fort_predt, arr_predt)
def test_base_margin(self):
booster = self.booster