diff --git a/doc/python/python_intro.rst b/doc/python/python_intro.rst index cc0e461e0..0d26a5253 100644 --- a/doc/python/python_intro.rst +++ b/doc/python/python_intro.rst @@ -162,6 +162,8 @@ Support Matrix +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ | cupy.ndarray | T | T | T | T | T | T | +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ +| torch.Tensor | T | T | T | T | T | T | ++-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ | dlpack | CPA | CPA | | CPA | FF | FF | +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ | datatable.Frame | T | FF | | NPA | FF | | diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py index 7c11495f7..729750f1f 100644 --- a/python-package/xgboost/compat.py +++ b/python-package/xgboost/compat.py @@ -138,9 +138,9 @@ def concat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statem from cudf import concat as CUDF_concat # pylint: disable=import-error return CUDF_concat(value, axis=0) - from .data import _is_cupy_array + from .data import _is_cupy_alike - if _is_cupy_array(value[0]): + if _is_cupy_alike(value[0]): import cupy # pylint: disable=import-error # pylint: disable=c-extension-no-member,no-member diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 4d093293d..36be766b1 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -357,10 +357,13 @@ def _numpy2ctypes_type(dtype: Type[np.number]) -> Type[CNumeric]: return _NUMPY_TO_CTYPES_MAPPING[dtype] +def _array_hasobject(data: DataType) -> bool: + return hasattr(data.dtype, "hasobject") and data.dtype.hasobject + + def _cuda_array_interface(data: DataType) -> bytes: - assert ( - data.dtype.hasobject is False - ), "Input data contains `object` dtype. Expecting numeric data." + if _array_hasobject(data): + raise ValueError("Input data contains `object` dtype. Expecting numeric data.") interface = data.__cuda_array_interface__ if "mask" in interface: interface["mask"] = interface["mask"].__cuda_array_interface__ @@ -2102,7 +2105,7 @@ class Booster: _array_interface, _cuda_array_interface, _ensure_np_dtype, - _is_cupy_array, + _is_cupy_alike, ) self._assign_dmatrix_features(dtrain) @@ -2116,7 +2119,7 @@ class Booster: "Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian." f" Got: {type(array)}" ) - if not isinstance(array, np.ndarray) and not _is_cupy_array(array): + if not isinstance(array, np.ndarray) and not _is_cupy_alike(array): raise TypeError(msg) n_samples = dtrain.num_row() @@ -2131,7 +2134,7 @@ class Booster: if isinstance(array, np.ndarray): array, _ = _ensure_np_dtype(array, array.dtype) interface = _array_interface(array) - elif _is_cupy_array(array): + elif _is_cupy_alike(array): interface = _cuda_array_interface(array) else: raise TypeError(msg) @@ -2461,7 +2464,7 @@ class Booster: _arrow_transform, _is_arrow, _is_cudf_df, - _is_cupy_array, + _is_cupy_alike, _is_list, _is_np_array_like, _is_pandas_df, @@ -2543,7 +2546,7 @@ class Booster: ) ) return _prediction_output(shape, dims, preds, False) - if _is_cupy_array(data): + if _is_cupy_alike(data): from .data import _transform_cupy_array data = _transform_cupy_array(data) diff --git a/python-package/xgboost/dask/__init__.py b/python-package/xgboost/dask/__init__.py index 046a2c982..6b4ae5b07 100644 --- a/python-package/xgboost/dask/__init__.py +++ b/python-package/xgboost/dask/__init__.py @@ -75,7 +75,7 @@ from xgboost.core import ( _deprecate_positional_args, _expect, ) -from xgboost.data import _is_cudf_ser, _is_cupy_array +from xgboost.data import _is_cudf_ser, _is_cupy_alike from xgboost.sklearn import ( XGBClassifier, XGBClassifierBase, @@ -1909,7 +1909,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase): self.classes_ = await self.client.compute(y.drop_duplicates()) if _is_cudf_ser(self.classes_): self.classes_ = self.classes_.to_cupy() - if _is_cupy_array(self.classes_): + if _is_cupy_alike(self.classes_): self.classes_ = self.classes_.get() self.classes_ = numpy.array(self.classes_) self.n_classes_ = len(self.classes_) diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 05337e788..49a0f43b7 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -26,6 +26,7 @@ from .core import ( DataIter, DataSplitMode, DMatrix, + _array_hasobject, _check_call, _cuda_array_interface, _ProxyDMatrix, @@ -77,9 +78,8 @@ def is_scipy_csr(data: DataType) -> bool: def _array_interface_dict(data: np.ndarray) -> dict: - assert ( - data.dtype.hasobject is False - ), "Input data contains `object` dtype. Expecting numeric data." + if _array_hasobject(data): + raise ValueError("Input data contains `object` dtype. Expecting numeric data.") interface = data.__array_interface__ if "mask" in interface: interface["mask"] = interface["mask"].__array_interface__ @@ -219,7 +219,7 @@ def _is_np_array_like(data: DataType) -> bool: def _ensure_np_dtype( data: DataType, dtype: Optional[NumpyDType] ) -> Tuple[np.ndarray, Optional[NumpyDType]]: - if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]: + if _array_hasobject(data) or data.dtype in [np.float16, np.bool_]: dtype = np.float32 data = data.astype(dtype, copy=False) if not data.flags.aligned: @@ -998,11 +998,8 @@ def _is_cudf_ser(data: DataType) -> bool: return lazy_isinstance(data, "cudf.core.series", "Series") -def _is_cupy_array(data: DataType) -> bool: - return any( - lazy_isinstance(data, n, "ndarray") - for n in ("cupy.core.core", "cupy", "cupy._core.core") - ) +def _is_cupy_alike(data: DataType) -> bool: + return hasattr(data, "__cuda_array_interface__") def _transform_cupy_array(data: DataType) -> CupyT: @@ -1010,7 +1007,7 @@ def _transform_cupy_array(data: DataType) -> CupyT: if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"): data = cupy.array(data, copy=False) - if data.dtype.hasobject or data.dtype in [cupy.bool_]: + if _array_hasobject(data) or data.dtype in [cupy.bool_]: data = data.astype(cupy.float32, copy=False) return data @@ -1222,7 +1219,7 @@ def dispatch_data_backend( return _from_cudf_df( data, missing, threads, feature_names, feature_types, enable_categorical ) - if _is_cupy_array(data): + if _is_cupy_alike(data): return _from_cupy_array(data, missing, threads, feature_names, feature_types) if _is_cupy_csr(data): raise TypeError("cupyx CSR is not supported yet.") @@ -1354,7 +1351,7 @@ def dispatch_meta_backend( data = _transform_dlpack(data) _meta_from_cupy_array(data, name, handle) return - if _is_cupy_array(data): + if _is_cupy_alike(data): _meta_from_cupy_array(data, name, handle) return if _is_cudf_ser(data): @@ -1419,7 +1416,7 @@ def _proxy_transform( return _transform_cudf_df( data, feature_names, feature_types, enable_categorical ) - if _is_cupy_array(data): + if _is_cupy_alike(data): data = _transform_cupy_array(data) return data, None, feature_names, feature_types if _is_dlpack(data): @@ -1470,7 +1467,7 @@ def dispatch_proxy_set_data( # pylint: disable=W0212 proxy._set_data_from_cuda_columnar(data, cast(List, cat_codes)) return - if _is_cupy_array(data): + if _is_cupy_alike(data): proxy._set_data_from_cuda_interface(data) # pylint: disable=W0212 return if _is_dlpack(data): diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 3383ae0b7..8c3a96784 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -39,7 +39,7 @@ from .core import ( _deprecate_positional_args, _parse_eval_str, ) -from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df +from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_alike, _is_pandas_df from .training import train @@ -1177,7 +1177,7 @@ class XGBModel(XGBModelBase): base_margin=base_margin, validate_features=validate_features, ) - if _is_cupy_array(predts): + if _is_cupy_alike(predts): import cupy # pylint: disable=import-error predts = cupy.asnumpy(predts) # ensure numpy array is used. @@ -1458,7 +1458,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase): classes = cp.unique(y.values) self.n_classes_ = len(classes) expected_classes = cp.array(self.classes_) - elif _is_cupy_array(y): + elif _is_cupy_alike(y): import cupy as cp # pylint: disable=E0401 classes = cp.unique(y)