Check __cuda_array_interface__ instead of cupy class. (#9971)

* Now XGBoost can directly consume CUDA data from torch.
This commit is contained in:
Jiaming Yuan 2024-01-09 19:59:01 +08:00 committed by GitHub
parent 2f57bbde3c
commit 01c4711556
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 31 additions and 29 deletions

View File

@ -162,6 +162,8 @@ Support Matrix
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| cupy.ndarray | T | T | T | T | T | T |
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| torch.Tensor | T | T | T | T | T | T |
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| dlpack | CPA | CPA | | CPA | FF | FF |
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
| datatable.Frame | T | FF | | NPA | FF | |

View File

@ -138,9 +138,9 @@ def concat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statem
from cudf import concat as CUDF_concat # pylint: disable=import-error
return CUDF_concat(value, axis=0)
from .data import _is_cupy_array
from .data import _is_cupy_alike
if _is_cupy_array(value[0]):
if _is_cupy_alike(value[0]):
import cupy # pylint: disable=import-error
# pylint: disable=c-extension-no-member,no-member

View File

@ -357,10 +357,13 @@ def _numpy2ctypes_type(dtype: Type[np.number]) -> Type[CNumeric]:
return _NUMPY_TO_CTYPES_MAPPING[dtype]
def _array_hasobject(data: DataType) -> bool:
return hasattr(data.dtype, "hasobject") and data.dtype.hasobject
def _cuda_array_interface(data: DataType) -> bytes:
assert (
data.dtype.hasobject is False
), "Input data contains `object` dtype. Expecting numeric data."
if _array_hasobject(data):
raise ValueError("Input data contains `object` dtype. Expecting numeric data.")
interface = data.__cuda_array_interface__
if "mask" in interface:
interface["mask"] = interface["mask"].__cuda_array_interface__
@ -2102,7 +2105,7 @@ class Booster:
_array_interface,
_cuda_array_interface,
_ensure_np_dtype,
_is_cupy_array,
_is_cupy_alike,
)
self._assign_dmatrix_features(dtrain)
@ -2116,7 +2119,7 @@ class Booster:
"Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian."
f" Got: {type(array)}"
)
if not isinstance(array, np.ndarray) and not _is_cupy_array(array):
if not isinstance(array, np.ndarray) and not _is_cupy_alike(array):
raise TypeError(msg)
n_samples = dtrain.num_row()
@ -2131,7 +2134,7 @@ class Booster:
if isinstance(array, np.ndarray):
array, _ = _ensure_np_dtype(array, array.dtype)
interface = _array_interface(array)
elif _is_cupy_array(array):
elif _is_cupy_alike(array):
interface = _cuda_array_interface(array)
else:
raise TypeError(msg)
@ -2461,7 +2464,7 @@ class Booster:
_arrow_transform,
_is_arrow,
_is_cudf_df,
_is_cupy_array,
_is_cupy_alike,
_is_list,
_is_np_array_like,
_is_pandas_df,
@ -2543,7 +2546,7 @@ class Booster:
)
)
return _prediction_output(shape, dims, preds, False)
if _is_cupy_array(data):
if _is_cupy_alike(data):
from .data import _transform_cupy_array
data = _transform_cupy_array(data)

View File

@ -75,7 +75,7 @@ from xgboost.core import (
_deprecate_positional_args,
_expect,
)
from xgboost.data import _is_cudf_ser, _is_cupy_array
from xgboost.data import _is_cudf_ser, _is_cupy_alike
from xgboost.sklearn import (
XGBClassifier,
XGBClassifierBase,
@ -1909,7 +1909,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
self.classes_ = await self.client.compute(y.drop_duplicates())
if _is_cudf_ser(self.classes_):
self.classes_ = self.classes_.to_cupy()
if _is_cupy_array(self.classes_):
if _is_cupy_alike(self.classes_):
self.classes_ = self.classes_.get()
self.classes_ = numpy.array(self.classes_)
self.n_classes_ = len(self.classes_)

View File

@ -26,6 +26,7 @@ from .core import (
DataIter,
DataSplitMode,
DMatrix,
_array_hasobject,
_check_call,
_cuda_array_interface,
_ProxyDMatrix,
@ -77,9 +78,8 @@ def is_scipy_csr(data: DataType) -> bool:
def _array_interface_dict(data: np.ndarray) -> dict:
assert (
data.dtype.hasobject is False
), "Input data contains `object` dtype. Expecting numeric data."
if _array_hasobject(data):
raise ValueError("Input data contains `object` dtype. Expecting numeric data.")
interface = data.__array_interface__
if "mask" in interface:
interface["mask"] = interface["mask"].__array_interface__
@ -219,7 +219,7 @@ def _is_np_array_like(data: DataType) -> bool:
def _ensure_np_dtype(
data: DataType, dtype: Optional[NumpyDType]
) -> Tuple[np.ndarray, Optional[NumpyDType]]:
if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]:
if _array_hasobject(data) or data.dtype in [np.float16, np.bool_]:
dtype = np.float32
data = data.astype(dtype, copy=False)
if not data.flags.aligned:
@ -998,11 +998,8 @@ def _is_cudf_ser(data: DataType) -> bool:
return lazy_isinstance(data, "cudf.core.series", "Series")
def _is_cupy_array(data: DataType) -> bool:
return any(
lazy_isinstance(data, n, "ndarray")
for n in ("cupy.core.core", "cupy", "cupy._core.core")
)
def _is_cupy_alike(data: DataType) -> bool:
return hasattr(data, "__cuda_array_interface__")
def _transform_cupy_array(data: DataType) -> CupyT:
@ -1010,7 +1007,7 @@ def _transform_cupy_array(data: DataType) -> CupyT:
if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"):
data = cupy.array(data, copy=False)
if data.dtype.hasobject or data.dtype in [cupy.bool_]:
if _array_hasobject(data) or data.dtype in [cupy.bool_]:
data = data.astype(cupy.float32, copy=False)
return data
@ -1222,7 +1219,7 @@ def dispatch_data_backend(
return _from_cudf_df(
data, missing, threads, feature_names, feature_types, enable_categorical
)
if _is_cupy_array(data):
if _is_cupy_alike(data):
return _from_cupy_array(data, missing, threads, feature_names, feature_types)
if _is_cupy_csr(data):
raise TypeError("cupyx CSR is not supported yet.")
@ -1354,7 +1351,7 @@ def dispatch_meta_backend(
data = _transform_dlpack(data)
_meta_from_cupy_array(data, name, handle)
return
if _is_cupy_array(data):
if _is_cupy_alike(data):
_meta_from_cupy_array(data, name, handle)
return
if _is_cudf_ser(data):
@ -1419,7 +1416,7 @@ def _proxy_transform(
return _transform_cudf_df(
data, feature_names, feature_types, enable_categorical
)
if _is_cupy_array(data):
if _is_cupy_alike(data):
data = _transform_cupy_array(data)
return data, None, feature_names, feature_types
if _is_dlpack(data):
@ -1470,7 +1467,7 @@ def dispatch_proxy_set_data(
# pylint: disable=W0212
proxy._set_data_from_cuda_columnar(data, cast(List, cat_codes))
return
if _is_cupy_array(data):
if _is_cupy_alike(data):
proxy._set_data_from_cuda_interface(data) # pylint: disable=W0212
return
if _is_dlpack(data):

View File

@ -39,7 +39,7 @@ from .core import (
_deprecate_positional_args,
_parse_eval_str,
)
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_alike, _is_pandas_df
from .training import train
@ -1177,7 +1177,7 @@ class XGBModel(XGBModelBase):
base_margin=base_margin,
validate_features=validate_features,
)
if _is_cupy_array(predts):
if _is_cupy_alike(predts):
import cupy # pylint: disable=import-error
predts = cupy.asnumpy(predts) # ensure numpy array is used.
@ -1458,7 +1458,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
classes = cp.unique(y.values)
self.n_classes_ = len(classes)
expected_classes = cp.array(self.classes_)
elif _is_cupy_array(y):
elif _is_cupy_alike(y):
import cupy as cp # pylint: disable=E0401
classes = cp.unique(y)