Check __cuda_array_interface__ instead of cupy class. (#9971)
* Now XGBoost can directly consume CUDA data from torch.
This commit is contained in:
parent
2f57bbde3c
commit
01c4711556
@ -162,6 +162,8 @@ Support Matrix
|
|||||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||||
| cupy.ndarray | T | T | T | T | T | T |
|
| cupy.ndarray | T | T | T | T | T | T |
|
||||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||||
|
| torch.Tensor | T | T | T | T | T | T |
|
||||||
|
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||||
| dlpack | CPA | CPA | | CPA | FF | FF |
|
| dlpack | CPA | CPA | | CPA | FF | FF |
|
||||||
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
+-------------------------+-----------+-------------------+-----------+-----------+--------------------+-------------+
|
||||||
| datatable.Frame | T | FF | | NPA | FF | |
|
| datatable.Frame | T | FF | | NPA | FF | |
|
||||||
|
|||||||
@ -138,9 +138,9 @@ def concat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statem
|
|||||||
from cudf import concat as CUDF_concat # pylint: disable=import-error
|
from cudf import concat as CUDF_concat # pylint: disable=import-error
|
||||||
|
|
||||||
return CUDF_concat(value, axis=0)
|
return CUDF_concat(value, axis=0)
|
||||||
from .data import _is_cupy_array
|
from .data import _is_cupy_alike
|
||||||
|
|
||||||
if _is_cupy_array(value[0]):
|
if _is_cupy_alike(value[0]):
|
||||||
import cupy # pylint: disable=import-error
|
import cupy # pylint: disable=import-error
|
||||||
|
|
||||||
# pylint: disable=c-extension-no-member,no-member
|
# pylint: disable=c-extension-no-member,no-member
|
||||||
|
|||||||
@ -357,10 +357,13 @@ def _numpy2ctypes_type(dtype: Type[np.number]) -> Type[CNumeric]:
|
|||||||
return _NUMPY_TO_CTYPES_MAPPING[dtype]
|
return _NUMPY_TO_CTYPES_MAPPING[dtype]
|
||||||
|
|
||||||
|
|
||||||
|
def _array_hasobject(data: DataType) -> bool:
|
||||||
|
return hasattr(data.dtype, "hasobject") and data.dtype.hasobject
|
||||||
|
|
||||||
|
|
||||||
def _cuda_array_interface(data: DataType) -> bytes:
|
def _cuda_array_interface(data: DataType) -> bytes:
|
||||||
assert (
|
if _array_hasobject(data):
|
||||||
data.dtype.hasobject is False
|
raise ValueError("Input data contains `object` dtype. Expecting numeric data.")
|
||||||
), "Input data contains `object` dtype. Expecting numeric data."
|
|
||||||
interface = data.__cuda_array_interface__
|
interface = data.__cuda_array_interface__
|
||||||
if "mask" in interface:
|
if "mask" in interface:
|
||||||
interface["mask"] = interface["mask"].__cuda_array_interface__
|
interface["mask"] = interface["mask"].__cuda_array_interface__
|
||||||
@ -2102,7 +2105,7 @@ class Booster:
|
|||||||
_array_interface,
|
_array_interface,
|
||||||
_cuda_array_interface,
|
_cuda_array_interface,
|
||||||
_ensure_np_dtype,
|
_ensure_np_dtype,
|
||||||
_is_cupy_array,
|
_is_cupy_alike,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._assign_dmatrix_features(dtrain)
|
self._assign_dmatrix_features(dtrain)
|
||||||
@ -2116,7 +2119,7 @@ class Booster:
|
|||||||
"Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian."
|
"Expecting `np.ndarray` or `cupy.ndarray` for gradient and hessian."
|
||||||
f" Got: {type(array)}"
|
f" Got: {type(array)}"
|
||||||
)
|
)
|
||||||
if not isinstance(array, np.ndarray) and not _is_cupy_array(array):
|
if not isinstance(array, np.ndarray) and not _is_cupy_alike(array):
|
||||||
raise TypeError(msg)
|
raise TypeError(msg)
|
||||||
|
|
||||||
n_samples = dtrain.num_row()
|
n_samples = dtrain.num_row()
|
||||||
@ -2131,7 +2134,7 @@ class Booster:
|
|||||||
if isinstance(array, np.ndarray):
|
if isinstance(array, np.ndarray):
|
||||||
array, _ = _ensure_np_dtype(array, array.dtype)
|
array, _ = _ensure_np_dtype(array, array.dtype)
|
||||||
interface = _array_interface(array)
|
interface = _array_interface(array)
|
||||||
elif _is_cupy_array(array):
|
elif _is_cupy_alike(array):
|
||||||
interface = _cuda_array_interface(array)
|
interface = _cuda_array_interface(array)
|
||||||
else:
|
else:
|
||||||
raise TypeError(msg)
|
raise TypeError(msg)
|
||||||
@ -2461,7 +2464,7 @@ class Booster:
|
|||||||
_arrow_transform,
|
_arrow_transform,
|
||||||
_is_arrow,
|
_is_arrow,
|
||||||
_is_cudf_df,
|
_is_cudf_df,
|
||||||
_is_cupy_array,
|
_is_cupy_alike,
|
||||||
_is_list,
|
_is_list,
|
||||||
_is_np_array_like,
|
_is_np_array_like,
|
||||||
_is_pandas_df,
|
_is_pandas_df,
|
||||||
@ -2543,7 +2546,7 @@ class Booster:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
return _prediction_output(shape, dims, preds, False)
|
return _prediction_output(shape, dims, preds, False)
|
||||||
if _is_cupy_array(data):
|
if _is_cupy_alike(data):
|
||||||
from .data import _transform_cupy_array
|
from .data import _transform_cupy_array
|
||||||
|
|
||||||
data = _transform_cupy_array(data)
|
data = _transform_cupy_array(data)
|
||||||
|
|||||||
@ -75,7 +75,7 @@ from xgboost.core import (
|
|||||||
_deprecate_positional_args,
|
_deprecate_positional_args,
|
||||||
_expect,
|
_expect,
|
||||||
)
|
)
|
||||||
from xgboost.data import _is_cudf_ser, _is_cupy_array
|
from xgboost.data import _is_cudf_ser, _is_cupy_alike
|
||||||
from xgboost.sklearn import (
|
from xgboost.sklearn import (
|
||||||
XGBClassifier,
|
XGBClassifier,
|
||||||
XGBClassifierBase,
|
XGBClassifierBase,
|
||||||
@ -1909,7 +1909,7 @@ class DaskXGBClassifier(DaskScikitLearnBase, XGBClassifierBase):
|
|||||||
self.classes_ = await self.client.compute(y.drop_duplicates())
|
self.classes_ = await self.client.compute(y.drop_duplicates())
|
||||||
if _is_cudf_ser(self.classes_):
|
if _is_cudf_ser(self.classes_):
|
||||||
self.classes_ = self.classes_.to_cupy()
|
self.classes_ = self.classes_.to_cupy()
|
||||||
if _is_cupy_array(self.classes_):
|
if _is_cupy_alike(self.classes_):
|
||||||
self.classes_ = self.classes_.get()
|
self.classes_ = self.classes_.get()
|
||||||
self.classes_ = numpy.array(self.classes_)
|
self.classes_ = numpy.array(self.classes_)
|
||||||
self.n_classes_ = len(self.classes_)
|
self.n_classes_ = len(self.classes_)
|
||||||
|
|||||||
@ -26,6 +26,7 @@ from .core import (
|
|||||||
DataIter,
|
DataIter,
|
||||||
DataSplitMode,
|
DataSplitMode,
|
||||||
DMatrix,
|
DMatrix,
|
||||||
|
_array_hasobject,
|
||||||
_check_call,
|
_check_call,
|
||||||
_cuda_array_interface,
|
_cuda_array_interface,
|
||||||
_ProxyDMatrix,
|
_ProxyDMatrix,
|
||||||
@ -77,9 +78,8 @@ def is_scipy_csr(data: DataType) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def _array_interface_dict(data: np.ndarray) -> dict:
|
def _array_interface_dict(data: np.ndarray) -> dict:
|
||||||
assert (
|
if _array_hasobject(data):
|
||||||
data.dtype.hasobject is False
|
raise ValueError("Input data contains `object` dtype. Expecting numeric data.")
|
||||||
), "Input data contains `object` dtype. Expecting numeric data."
|
|
||||||
interface = data.__array_interface__
|
interface = data.__array_interface__
|
||||||
if "mask" in interface:
|
if "mask" in interface:
|
||||||
interface["mask"] = interface["mask"].__array_interface__
|
interface["mask"] = interface["mask"].__array_interface__
|
||||||
@ -219,7 +219,7 @@ def _is_np_array_like(data: DataType) -> bool:
|
|||||||
def _ensure_np_dtype(
|
def _ensure_np_dtype(
|
||||||
data: DataType, dtype: Optional[NumpyDType]
|
data: DataType, dtype: Optional[NumpyDType]
|
||||||
) -> Tuple[np.ndarray, Optional[NumpyDType]]:
|
) -> Tuple[np.ndarray, Optional[NumpyDType]]:
|
||||||
if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]:
|
if _array_hasobject(data) or data.dtype in [np.float16, np.bool_]:
|
||||||
dtype = np.float32
|
dtype = np.float32
|
||||||
data = data.astype(dtype, copy=False)
|
data = data.astype(dtype, copy=False)
|
||||||
if not data.flags.aligned:
|
if not data.flags.aligned:
|
||||||
@ -998,11 +998,8 @@ def _is_cudf_ser(data: DataType) -> bool:
|
|||||||
return lazy_isinstance(data, "cudf.core.series", "Series")
|
return lazy_isinstance(data, "cudf.core.series", "Series")
|
||||||
|
|
||||||
|
|
||||||
def _is_cupy_array(data: DataType) -> bool:
|
def _is_cupy_alike(data: DataType) -> bool:
|
||||||
return any(
|
return hasattr(data, "__cuda_array_interface__")
|
||||||
lazy_isinstance(data, n, "ndarray")
|
|
||||||
for n in ("cupy.core.core", "cupy", "cupy._core.core")
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _transform_cupy_array(data: DataType) -> CupyT:
|
def _transform_cupy_array(data: DataType) -> CupyT:
|
||||||
@ -1010,7 +1007,7 @@ def _transform_cupy_array(data: DataType) -> CupyT:
|
|||||||
|
|
||||||
if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"):
|
if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"):
|
||||||
data = cupy.array(data, copy=False)
|
data = cupy.array(data, copy=False)
|
||||||
if data.dtype.hasobject or data.dtype in [cupy.bool_]:
|
if _array_hasobject(data) or data.dtype in [cupy.bool_]:
|
||||||
data = data.astype(cupy.float32, copy=False)
|
data = data.astype(cupy.float32, copy=False)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@ -1222,7 +1219,7 @@ def dispatch_data_backend(
|
|||||||
return _from_cudf_df(
|
return _from_cudf_df(
|
||||||
data, missing, threads, feature_names, feature_types, enable_categorical
|
data, missing, threads, feature_names, feature_types, enable_categorical
|
||||||
)
|
)
|
||||||
if _is_cupy_array(data):
|
if _is_cupy_alike(data):
|
||||||
return _from_cupy_array(data, missing, threads, feature_names, feature_types)
|
return _from_cupy_array(data, missing, threads, feature_names, feature_types)
|
||||||
if _is_cupy_csr(data):
|
if _is_cupy_csr(data):
|
||||||
raise TypeError("cupyx CSR is not supported yet.")
|
raise TypeError("cupyx CSR is not supported yet.")
|
||||||
@ -1354,7 +1351,7 @@ def dispatch_meta_backend(
|
|||||||
data = _transform_dlpack(data)
|
data = _transform_dlpack(data)
|
||||||
_meta_from_cupy_array(data, name, handle)
|
_meta_from_cupy_array(data, name, handle)
|
||||||
return
|
return
|
||||||
if _is_cupy_array(data):
|
if _is_cupy_alike(data):
|
||||||
_meta_from_cupy_array(data, name, handle)
|
_meta_from_cupy_array(data, name, handle)
|
||||||
return
|
return
|
||||||
if _is_cudf_ser(data):
|
if _is_cudf_ser(data):
|
||||||
@ -1419,7 +1416,7 @@ def _proxy_transform(
|
|||||||
return _transform_cudf_df(
|
return _transform_cudf_df(
|
||||||
data, feature_names, feature_types, enable_categorical
|
data, feature_names, feature_types, enable_categorical
|
||||||
)
|
)
|
||||||
if _is_cupy_array(data):
|
if _is_cupy_alike(data):
|
||||||
data = _transform_cupy_array(data)
|
data = _transform_cupy_array(data)
|
||||||
return data, None, feature_names, feature_types
|
return data, None, feature_names, feature_types
|
||||||
if _is_dlpack(data):
|
if _is_dlpack(data):
|
||||||
@ -1470,7 +1467,7 @@ def dispatch_proxy_set_data(
|
|||||||
# pylint: disable=W0212
|
# pylint: disable=W0212
|
||||||
proxy._set_data_from_cuda_columnar(data, cast(List, cat_codes))
|
proxy._set_data_from_cuda_columnar(data, cast(List, cat_codes))
|
||||||
return
|
return
|
||||||
if _is_cupy_array(data):
|
if _is_cupy_alike(data):
|
||||||
proxy._set_data_from_cuda_interface(data) # pylint: disable=W0212
|
proxy._set_data_from_cuda_interface(data) # pylint: disable=W0212
|
||||||
return
|
return
|
||||||
if _is_dlpack(data):
|
if _is_dlpack(data):
|
||||||
|
|||||||
@ -39,7 +39,7 @@ from .core import (
|
|||||||
_deprecate_positional_args,
|
_deprecate_positional_args,
|
||||||
_parse_eval_str,
|
_parse_eval_str,
|
||||||
)
|
)
|
||||||
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_array, _is_pandas_df
|
from .data import _is_cudf_df, _is_cudf_ser, _is_cupy_alike, _is_pandas_df
|
||||||
from .training import train
|
from .training import train
|
||||||
|
|
||||||
|
|
||||||
@ -1177,7 +1177,7 @@ class XGBModel(XGBModelBase):
|
|||||||
base_margin=base_margin,
|
base_margin=base_margin,
|
||||||
validate_features=validate_features,
|
validate_features=validate_features,
|
||||||
)
|
)
|
||||||
if _is_cupy_array(predts):
|
if _is_cupy_alike(predts):
|
||||||
import cupy # pylint: disable=import-error
|
import cupy # pylint: disable=import-error
|
||||||
|
|
||||||
predts = cupy.asnumpy(predts) # ensure numpy array is used.
|
predts = cupy.asnumpy(predts) # ensure numpy array is used.
|
||||||
@ -1458,7 +1458,7 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
|
|||||||
classes = cp.unique(y.values)
|
classes = cp.unique(y.values)
|
||||||
self.n_classes_ = len(classes)
|
self.n_classes_ = len(classes)
|
||||||
expected_classes = cp.array(self.classes_)
|
expected_classes = cp.array(self.classes_)
|
||||||
elif _is_cupy_array(y):
|
elif _is_cupy_alike(y):
|
||||||
import cupy as cp # pylint: disable=E0401
|
import cupy as cp # pylint: disable=E0401
|
||||||
|
|
||||||
classes = cp.unique(y)
|
classes = cp.unique(y)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user