Enforce input data is not object. (#6927)

* Check for object data type.
* Allow strided arrays with greater underlying buffer size.
This commit is contained in:
Jiaming Yuan 2021-05-02 00:09:01 +08:00 committed by GitHub
parent a1d23f6613
commit 37ad60fe25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 31 additions and 4 deletions

View File

@ -233,6 +233,9 @@ def _numpy2ctypes_type(dtype):
def _array_interface(data: np.ndarray) -> bytes: def _array_interface(data: np.ndarray) -> bytes:
assert (
data.dtype.hasobject is False
), "Input data contains `object` dtype. Expecting numeric data."
interface = data.__array_interface__ interface = data.__array_interface__
if "mask" in interface: if "mask" in interface:
interface["mask"] = interface["mask"].__array_interface__ interface["mask"] = interface["mask"].__array_interface__
@ -1908,8 +1911,8 @@ class Booster(object):
) )
if isinstance(data, np.ndarray): if isinstance(data, np.ndarray):
from .data import _maybe_np_slice from .data import _ensure_np_dtype
data = _maybe_np_slice(data, data.dtype) data, _ = _ensure_np_dtype(data, data.dtype)
_check_call( _check_call(
_LIB.XGBoosterPredictFromDense( _LIB.XGBoosterPredictFromDense(
self.handle, self.handle,

View File

@ -104,6 +104,13 @@ def _is_numpy_array(data):
return isinstance(data, (np.ndarray, np.matrix)) return isinstance(data, (np.ndarray, np.matrix))
def _ensure_np_dtype(data, dtype):
if data.dtype.hasobject:
data = data.astype(np.float32, copy=False)
dtype = np.float32
return data, dtype
def _maybe_np_slice(data, dtype): def _maybe_np_slice(data, dtype):
'''Handle numpy slice. This can be removed if we use __array_interface__. '''Handle numpy slice. This can be removed if we use __array_interface__.
''' '''
@ -118,6 +125,7 @@ def _maybe_np_slice(data, dtype):
data = np.array(data, copy=False, dtype=dtype) data = np.array(data, copy=False, dtype=dtype)
except AttributeError: except AttributeError:
data = np.array(data, copy=False, dtype=dtype) data = np.array(data, copy=False, dtype=dtype)
data, dtype = _ensure_np_dtype(data, dtype)
return data return data

View File

@ -229,8 +229,11 @@ class ArrayInterfaceHandler {
} }
strides[1] = n; strides[1] = n;
} }
auto valid = (rows - 1) * strides[0] + (cols - 1) * strides[1] == (rows * cols) - 1;
CHECK(valid) << "Invalid strides in array."; auto valid = rows * strides[0] + cols * strides[1] >= (rows * cols);
CHECK(valid) << "Invalid strides in array."
<< " strides: (" << strides[0] << "," << strides[1]
<< "), shape: (" << rows << ", " << cols << ")";
} }
static void* ExtractData(std::map<std::string, Json> const &column, static void* ExtractData(std::map<std::string, Json> const &column,

View File

@ -155,6 +155,14 @@ class TestInplacePredict:
predt_from_array = booster.inplace_predict(X[:10, ...], missing=self.missing) predt_from_array = booster.inplace_predict(X[:10, ...], missing=self.missing)
predt_from_dmatrix = booster.predict(test) predt_from_dmatrix = booster.predict(test)
X_obj = X.copy().astype(object)
assert X_obj.dtype.hasobject is True
assert X.dtype.hasobject is False
np.testing.assert_allclose(
booster.inplace_predict(X_obj), booster.inplace_predict(X)
)
np.testing.assert_allclose(predt_from_dmatrix, predt_from_array) np.testing.assert_allclose(predt_from_dmatrix, predt_from_array)
predt_from_array = booster.inplace_predict( predt_from_array = booster.inplace_predict(
@ -192,8 +200,13 @@ class TestInplacePredict:
arr_predt = booster.inplace_predict(X) arr_predt = booster.inplace_predict(X)
dmat_predt = booster.predict(xgb.DMatrix(X)) dmat_predt = booster.predict(xgb.DMatrix(X))
X = df.values
X = np.asfortranarray(X)
fort_predt = booster.inplace_predict(X)
np.testing.assert_allclose(dmat_predt, arr_predt) np.testing.assert_allclose(dmat_predt, arr_predt)
np.testing.assert_allclose(df_predt, arr_predt) np.testing.assert_allclose(df_predt, arr_predt)
np.testing.assert_allclose(fort_predt, arr_predt)
def test_base_margin(self): def test_base_margin(self):
booster = self.booster booster = self.booster