Check __cuda_array_interface__ instead of cupy class. (#9971)

* Now XGBoost can directly consume CUDA data from torch.
This commit is contained in:
Jiaming Yuan 2024-01-09 19:59:01 +08:00 committed by GitHub
parent 2f57bbde3c
commit 01c4711556
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 31 additions and 29 deletions

View File

@ -162,6 +162,8 @@ Support Matrix
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| cupy.ndarray | T | T | T | T | T | T | | cupy.ndarray | T | T | T | T | T | T |
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| torch.Tensor | T | T | T | T | T | T |
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| dlpack | CPA | CPA | | CPA | FF | FF | | dlpack | CPA | CPA | | CPA | FF | FF |
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+ +-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| datatable.Frame | T | FF | | NPA | FF | | | datatable.Frame | T | FF | | NPA | FF | |

View File

@ -138,9 +138,9 @@ def concat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statem
from cudf import concat as CUDF_concat # pylint: disable=import-error from cudf import concat as CUDF_concat # pylint: disable=import-error
return CUDF_concat(value, axis=0) return CUDF_concat(value, axis=0)
from .data import _is_cupy_array from .data import _is_cupy_alike
if _is_cupy_array(value[0]): if _is_cupy_alike(value[0]):
import cupy # pylint: disable=import-error import cupy # pylint: disable=import-error
# pylint: disable=c-extension-no-member,no-member # pylint: disable=c-extension-no-member,no-member

View File

@ -357,10 +357,13 @@ def _numpy2ctypes_type(dtype: Type[np.number]) -> Type[CNumeric]:
return _NUMPY_TO_CTYPES_MAPPING[dtype] return _NUMPY_TO_CTYPES_MAPPING[dtype]
def _array_hasobject(data: DataType) -> bool:
return hasattr(data.dtype, "hasobject") and data.dtype.hasobject
def _cuda_array_interface(data: DataType) -> bytes: def _cuda_array_interface(data: DataType) -> bytes:
assert ( if _array_hasobject(data):
data.dtype.hasobject is False raise ValueError("Input data contains `object` dtype. Expecting numeric data.")
), "Input data contains `object` dtype. Expecting numeric data."
interface = data.__cuda_array_interface__ interface = data.__cuda_array_interface__
if "mask" in interface: if "mask" in interface:
interface["mask"] = interface["mask"].__cuda_array_interface__ interface["mask"] = interface["mask"].__cuda_array_interface__
@ -2102,7 +2105,7 @@ class Booster:
_array_interface, _array_interface,
_cuda_array_interface, _cuda_array_interface,
_ensure_np_dtype, _ensure_np_dtype,
_is_cupy_array, _is_cupy_alike,
) )
self._assign_dmatrix_features(dtrain) self._assign_dmatrix_features(dtrain)
@ -2116,7 +2119,7 @@ class Booster:
"Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian." "Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian."
f" Got: {type(array)}" f" Got: {type(array)}"
) )
if not isinstance(array, np.ndarray) and not _is_cupy_array(array): if not isinstance(array, np.ndarray) and not _is_cupy_alike(array):
raise TypeError(msg) raise TypeError(msg)
n_samples = dtrain.num_row() n_samples = dtrain.num_row()
@ -2131,7 +2134,7 @@ class Booster:
if isinstance(array, np.ndarray): if isinstance(array, np.ndarray):
array, _ = _ensure_np_dtype(array, array.dtype) array, _ = _ensure_np_dtype(array, array.dtype)
interface = _array_interface(array) interface = _array_interface(array)
elif _is_cupy_array(array): elif _is_cupy_alike(array):
interface = _cuda_array_interface(array) interface = _cuda_array_interface(array)
else: else:
raise TypeError(msg) raise TypeError(msg)
@ -2461,7 +2464,7 @@ class Booster:
_arrow_transform, _arrow_transform,
_is_arrow, _is_arrow,
_is_cudf_df, _is_cudf_df,
_is_cupy_array, _is_cupy_alike,
_is_list, _is_list,
_is_np_array_like, _is_np_array_like,
_is_pandas_df, _is_pandas_df,
@ -2543,7 +2546,7 @@ class Booster:
) )
) )
return _prediction_output(shape, dims, preds, False) return _prediction_output(shape, dims, preds, False)
if _is_cupy_array(data): if _is_cupy_alike(data):
from .data import _transform_cupy_array from .data import _transform_cupy_array
data = _transform_cupy_array(data) data = _transform_cupy_array(data)

View File

@ -75,7 +75,7 @@ from xgboost.core import (
_deprecate_positional_args, _deprecate_positional_args,
_expect, _expect,
) )
from xgboost.data import _is_cudf_ser, _is_cupy_array from xgboost.data import _is_cudf_ser, _is_cupy_alike
from xgboost.sklearn import ( from xgboost.sklearn import (
XGBClassifier, XGBClassifier,
XGBClassifierBase, XGBClassifierBase,
@ -1909,7 +1909,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
self.classes_ = await self.client.compute(y.drop_duplicates()) self.classes_ = await self.client.compute(y.drop_duplicates())
if _is_cudf_ser(self.classes_): if _is_cudf_ser(self.classes_):
self.classes_ = self.classes_.to_cupy() self.classes_ = self.classes_.to_cupy()
if _is_cupy_array(self.classes_): if _is_cupy_alike(self.classes_):
self.classes_ = self.classes_.get() self.classes_ = self.classes_.get()
self.classes_ = numpy.array(self.classes_) self.classes_ = numpy.array(self.classes_)
self.n_classes_ = len(self.classes_) self.n_classes_ = len(self.classes_)

View File

@ -26,6 +26,7 @@ from .core import (
DataIter, DataIter,
DataSplitMode, DataSplitMode,
DMatrix, DMatrix,
_array_hasobject,
_check_call, _check_call,
_cuda_array_interface, _cuda_array_interface,
_ProxyDMatrix, _ProxyDMatrix,
@ -77,9 +78,8 @@ def is_scipy_csr(data: DataType) -> bool:
def _array_interface_dict(data: np.ndarray) -> dict: def _array_interface_dict(data: np.ndarray) -> dict:
assert ( if _array_hasobject(data):
data.dtype.hasobject is False raise ValueError("Input data contains `object` dtype. Expecting numeric data.")
), "Input data contains `object` dtype. Expecting numeric data."
interface = data.__array_interface__ interface = data.__array_interface__
if "mask" in interface: if "mask" in interface:
interface["mask"] = interface["mask"].__array_interface__ interface["mask"] = interface["mask"].__array_interface__
@ -219,7 +219,7 @@ def _is_np_array_like(data: DataType) -> bool:
def _ensure_np_dtype( def _ensure_np_dtype(
data: DataType, dtype: Optional[NumpyDType] data: DataType, dtype: Optional[NumpyDType]
) -> Tuple[np.ndarray, Optional[NumpyDType]]: ) -> Tuple[np.ndarray, Optional[NumpyDType]]:
if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]: if _array_hasobject(data) or data.dtype in [np.float16, np.bool_]:
dtype = np.float32 dtype = np.float32
data = data.astype(dtype, copy=False) data = data.astype(dtype, copy=False)
if not data.flags.aligned: if not data.flags.aligned:
@ -998,11 +998,8 @@ def _is_cudf_ser(data: DataType) -> bool:
return lazy_isinstance(data, "cudf.core.series", "Series") return lazy_isinstance(data, "cudf.core.series", "Series")
def _is_cupy_array(data: DataType) -> bool: def _is_cupy_alike(data: DataType) -> bool:
return any( return hasattr(data, "__cuda_array_interface__")
lazy_isinstance(data, n, "ndarray")
for n in ("cupy.core.core", "cupy", "cupy._core.core")
)
def _transform_cupy_array(data: DataType) -> CupyT: def _transform_cupy_array(data: DataType) -> CupyT:
@ -1010,7 +1007,7 @@ def _transform_cupy_array(data: DataType) -> CupyT:
if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"): if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"):
data = cupy.array(data, copy=False) data = cupy.array(data, copy=False)
if data.dtype.hasobject or data.dtype in [cupy.bool_]: if _array_hasobject(data) or data.dtype in [cupy.bool_]:
data = data.astype(cupy.float32, copy=False) data = data.astype(cupy.float32, copy=False)
return data return data
@ -1222,7 +1219,7 @@ def dispatch_data_backend(
return _from_cudf_df( return _from_cudf_df(
data, missing, threads, feature_names, feature_types, enable_categorical data, missing, threads, feature_names, feature_types, enable_categorical
) )
if _is_cupy_array(data): if _is_cupy_alike(data):
return _from_cupy_array(data, missing, threads, feature_names, feature_types) return _from_cupy_array(data, missing, threads, feature_names, feature_types)
if _is_cupy_csr(data): if _is_cupy_csr(data):
raise TypeError("cupyx CSR is not supported yet.") raise TypeError("cupyx CSR is not supported yet.")
@ -1354,7 +1351,7 @@ def dispatch_meta_backend(
data = _transform_dlpack(data) data = _transform_dlpack(data)
_meta_from_cupy_array(data, name, handle) _meta_from_cupy_array(data, name, handle)
return return
if _is_cupy_array(data): if _is_cupy_alike(data):
_meta_from_cupy_array(data, name, handle) _meta_from_cupy_array(data, name, handle)
return return
if _is_cudf_ser(data): if _is_cudf_ser(data):
@ -1419,7 +1416,7 @@ def _proxy_transform(
return _transform_cudf_df( return _transform_cudf_df(
data, feature_names, feature_types, enable_categorical data, feature_names, feature_types, enable_categorical
) )
if _is_cupy_array(data): if _is_cupy_alike(data):
data = _transform_cupy_array(data) data = _transform_cupy_array(data)
return data, None, feature_names, feature_types return data, None, feature_names, feature_types
if _is_dlpack(data): if _is_dlpack(data):
@ -1470,7 +1467,7 @@ def dispatch_proxy_set_data(
# pylint: disable=W0212 # pylint: disable=W0212
proxy._set_data_from_cuda_columnar(data, cast(List, cat_codes)) proxy._set_data_from_cuda_columnar(data, cast(List, cat_codes))
return return
if _is_cupy_array(data): if _is_cupy_alike(data):
proxy._set_data_from_cuda_interface(data) # pylint: disable=W0212 proxy._set_data_from_cuda_interface(data) # pylint: disable=W0212
return return
if _is_dlpack(data): if _is_dlpack(data):

View File

@ -39,7 +39,7 @@ from .core import (
_deprecate_positional_args, _deprecate_positional_args,
_parse_eval_str, _parse_eval_str,
) )
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_alike, _is_pandas_df
from .training import train from .training import train
@ -1177,7 +1177,7 @@ class XGBModel(XGBModelBase):
base_margin=base_margin, base_margin=base_margin,
validate_features=validate_features, validate_features=validate_features,
) )
if _is_cupy_array(predts): if _is_cupy_alike(predts):
import cupy # pylint: disable=import-error import cupy # pylint: disable=import-error
predts = cupy.asnumpy(predts) # ensure numpy array is used. predts = cupy.asnumpy(predts) # ensure numpy array is used.
@ -1458,7 +1458,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
classes = cp.unique(y.values) classes = cp.unique(y.values)
self.n_classes_ = len(classes) self.n_classes_ = len(classes)
expected_classes = cp.array(self.classes_) expected_classes = cp.array(self.classes_)
elif _is_cupy_array(y): elif _is_cupy_alike(y):
import cupy as cp # pylint: disable=E0401 import cupy as cp # pylint: disable=E0401
classes = cp.unique(y) classes = cp.unique(y)