Check __cuda_array_interface__ instead of cupy class. (#9971)
* Now XGBoost can directly consume CUDA data from torch.
This commit is contained in:
parent
2f57bbde3c
commit
01c4711556
@ -162,6 +162,8 @@ Support Matrix
|
||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||
| cupy.ndarray | T | T | T | T | T | T |
|
||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||
| torch.Tensor | T | T | T | T | T | T |
|
||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||
| dlpack | CPA | CPA | | CPA | FF | FF |
|
||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||
| datatable.Frame | T | FF | | NPA | FF | |
|
||||
|
||||
@ -138,9 +138,9 @@ def concat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statem
|
||||
from cudf import concat as CUDF_concat # pylint: disable=import-error
|
||||
|
||||
return CUDF_concat(value, axis=0)
|
||||
from .data import _is_cupy_array
|
||||
from .data import _is_cupy_alike
|
||||
|
||||
if _is_cupy_array(value[0]):
|
||||
if _is_cupy_alike(value[0]):
|
||||
import cupy # pylint: disable=import-error
|
||||
|
||||
# pylint: disable=c-extension-no-member,no-member
|
||||
|
||||
@ -357,10 +357,13 @@ def _numpy2ctypes_type(dtype: Type[np.number]) -> Type[CNumeric]:
|
||||
return _NUMPY_TO_CTYPES_MAPPING[dtype]
|
||||
|
||||
|
||||
def _array_hasobject(data: DataType) -> bool:
|
||||
return hasattr(data.dtype, "hasobject") and data.dtype.hasobject
|
||||
|
||||
|
||||
def _cuda_array_interface(data: DataType) -> bytes:
|
||||
assert (
|
||||
data.dtype.hasobject is False
|
||||
), "Input data contains `object` dtype. Expecting numeric data."
|
||||
if _array_hasobject(data):
|
||||
raise ValueError("Input data contains `object` dtype. Expecting numeric data.")
|
||||
interface = data.__cuda_array_interface__
|
||||
if "mask" in interface:
|
||||
interface["mask"] = interface["mask"].__cuda_array_interface__
|
||||
@ -2102,7 +2105,7 @@ class Booster:
|
||||
_array_interface,
|
||||
_cuda_array_interface,
|
||||
_ensure_np_dtype,
|
||||
_is_cupy_array,
|
||||
_is_cupy_alike,
|
||||
)
|
||||
|
||||
self._assign_dmatrix_features(dtrain)
|
||||
@ -2116,7 +2119,7 @@ class Booster:
|
||||
"Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian."
|
||||
f" Got: {type(array)}"
|
||||
)
|
||||
if not isinstance(array, np.ndarray) and not _is_cupy_array(array):
|
||||
if not isinstance(array, np.ndarray) and not _is_cupy_alike(array):
|
||||
raise TypeError(msg)
|
||||
|
||||
n_samples = dtrain.num_row()
|
||||
@ -2131,7 +2134,7 @@ class Booster:
|
||||
if isinstance(array, np.ndarray):
|
||||
array, _ = _ensure_np_dtype(array, array.dtype)
|
||||
interface = _array_interface(array)
|
||||
elif _is_cupy_array(array):
|
||||
elif _is_cupy_alike(array):
|
||||
interface = _cuda_array_interface(array)
|
||||
else:
|
||||
raise TypeError(msg)
|
||||
@ -2461,7 +2464,7 @@ class Booster:
|
||||
_arrow_transform,
|
||||
_is_arrow,
|
||||
_is_cudf_df,
|
||||
_is_cupy_array,
|
||||
_is_cupy_alike,
|
||||
_is_list,
|
||||
_is_np_array_like,
|
||||
_is_pandas_df,
|
||||
@ -2543,7 +2546,7 @@ class Booster:
|
||||
)
|
||||
)
|
||||
return _prediction_output(shape, dims, preds, False)
|
||||
if _is_cupy_array(data):
|
||||
if _is_cupy_alike(data):
|
||||
from .data import _transform_cupy_array
|
||||
|
||||
data = _transform_cupy_array(data)
|
||||
|
||||
@ -75,7 +75,7 @@ from xgboost.core import (
|
||||
_deprecate_positional_args,
|
||||
_expect,
|
||||
)
|
||||
from xgboost.data import _is_cudf_ser, _is_cupy_array
|
||||
from xgboost.data import _is_cudf_ser, _is_cupy_alike
|
||||
from xgboost.sklearn import (
|
||||
XGBClassifier,
|
||||
XGBClassifierBase,
|
||||
@ -1909,7 +1909,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
||||
self.classes_ = await self.client.compute(y.drop_duplicates())
|
||||
if _is_cudf_ser(self.classes_):
|
||||
self.classes_ = self.classes_.to_cupy()
|
||||
if _is_cupy_array(self.classes_):
|
||||
if _is_cupy_alike(self.classes_):
|
||||
self.classes_ = self.classes_.get()
|
||||
self.classes_ = numpy.array(self.classes_)
|
||||
self.n_classes_ = len(self.classes_)
|
||||
|
||||
@ -26,6 +26,7 @@ from .core import (
|
||||
DataIter,
|
||||
DataSplitMode,
|
||||
DMatrix,
|
||||
_array_hasobject,
|
||||
_check_call,
|
||||
_cuda_array_interface,
|
||||
_ProxyDMatrix,
|
||||
@ -77,9 +78,8 @@ def is_scipy_csr(data: DataType) -> bool:
|
||||
|
||||
|
||||
def _array_interface_dict(data: np.ndarray) -> dict:
|
||||
assert (
|
||||
data.dtype.hasobject is False
|
||||
), "Input data contains `object` dtype. Expecting numeric data."
|
||||
if _array_hasobject(data):
|
||||
raise ValueError("Input data contains `object` dtype. Expecting numeric data.")
|
||||
interface = data.__array_interface__
|
||||
if "mask" in interface:
|
||||
interface["mask"] = interface["mask"].__array_interface__
|
||||
@ -219,7 +219,7 @@ def _is_np_array_like(data: DataType) -> bool:
|
||||
def _ensure_np_dtype(
|
||||
data: DataType, dtype: Optional[NumpyDType]
|
||||
) -> Tuple[np.ndarray, Optional[NumpyDType]]:
|
||||
if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]:
|
||||
if _array_hasobject(data) or data.dtype in [np.float16, np.bool_]:
|
||||
dtype = np.float32
|
||||
data = data.astype(dtype, copy=False)
|
||||
if not data.flags.aligned:
|
||||
@ -998,11 +998,8 @@ def _is_cudf_ser(data: DataType) -> bool:
|
||||
return lazy_isinstance(data, "cudf.core.series", "Series")
|
||||
|
||||
|
||||
def _is_cupy_array(data: DataType) -> bool:
|
||||
return any(
|
||||
lazy_isinstance(data, n, "ndarray")
|
||||
for n in ("cupy.core.core", "cupy", "cupy._core.core")
|
||||
)
|
||||
def _is_cupy_alike(data: DataType) -> bool:
|
||||
return hasattr(data, "__cuda_array_interface__")
|
||||
|
||||
|
||||
def _transform_cupy_array(data: DataType) -> CupyT:
|
||||
@ -1010,7 +1007,7 @@ def _transform_cupy_array(data: DataType) -> CupyT:
|
||||
|
||||
if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"):
|
||||
data = cupy.array(data, copy=False)
|
||||
if data.dtype.hasobject or data.dtype in [cupy.bool_]:
|
||||
if _array_hasobject(data) or data.dtype in [cupy.bool_]:
|
||||
data = data.astype(cupy.float32, copy=False)
|
||||
return data
|
||||
|
||||
@ -1222,7 +1219,7 @@ def dispatch_data_backend(
|
||||
return _from_cudf_df(
|
||||
data, missing, threads, feature_names, feature_types, enable_categorical
|
||||
)
|
||||
if _is_cupy_array(data):
|
||||
if _is_cupy_alike(data):
|
||||
return _from_cupy_array(data, missing, threads, feature_names, feature_types)
|
||||
if _is_cupy_csr(data):
|
||||
raise TypeError("cupyx CSR is not supported yet.")
|
||||
@ -1354,7 +1351,7 @@ def dispatch_meta_backend(
|
||||
data = _transform_dlpack(data)
|
||||
_meta_from_cupy_array(data, name, handle)
|
||||
return
|
||||
if _is_cupy_array(data):
|
||||
if _is_cupy_alike(data):
|
||||
_meta_from_cupy_array(data, name, handle)
|
||||
return
|
||||
if _is_cudf_ser(data):
|
||||
@ -1419,7 +1416,7 @@ def _proxy_transform(
|
||||
return _transform_cudf_df(
|
||||
data, feature_names, feature_types, enable_categorical
|
||||
)
|
||||
if _is_cupy_array(data):
|
||||
if _is_cupy_alike(data):
|
||||
data = _transform_cupy_array(data)
|
||||
return data, None, feature_names, feature_types
|
||||
if _is_dlpack(data):
|
||||
@ -1470,7 +1467,7 @@ def dispatch_proxy_set_data(
|
||||
# pylint: disable=W0212
|
||||
proxy._set_data_from_cuda_columnar(data, cast(List, cat_codes))
|
||||
return
|
||||
if _is_cupy_array(data):
|
||||
if _is_cupy_alike(data):
|
||||
proxy._set_data_from_cuda_interface(data) # pylint: disable=W0212
|
||||
return
|
||||
if _is_dlpack(data):
|
||||
|
||||
@ -39,7 +39,7 @@ from .core import (
|
||||
_deprecate_positional_args,
|
||||
_parse_eval_str,
|
||||
)
|
||||
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df
|
||||
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_alike, _is_pandas_df
|
||||
from .training import train
|
||||
|
||||
|
||||
@ -1177,7 +1177,7 @@ class XGBModel(XGBModelBase):
|
||||
base_margin=base_margin,
|
||||
validate_features=validate_features,
|
||||
)
|
||||
if _is_cupy_array(predts):
|
||||
if _is_cupy_alike(predts):
|
||||
import cupy # pylint: disable=import-error
|
||||
|
||||
predts = cupy.asnumpy(predts) # ensure numpy array is used.
|
||||
@ -1458,7 +1458,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
||||
classes = cp.unique(y.values)
|
||||
self.n_classes_ = len(classes)
|
||||
expected_classes = cp.array(self.classes_)
|
||||
elif _is_cupy_array(y):
|
||||
elif _is_cupy_alike(y):
|
||||
import cupy as cp # pylint: disable=E0401
|
||||
|
||||
classes = cp.unique(y)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user