Add support for cudf.Series (#4891)
This commit is contained in:
parent
82ee2317e8
commit
2fa8b359e0
@ -132,9 +132,11 @@ except ImportError:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from cudf import DataFrame as CUDF_DataFrame
|
from cudf import DataFrame as CUDF_DataFrame
|
||||||
|
from cudf import Series as CUDF_Series
|
||||||
CUDF_INSTALLED = True
|
CUDF_INSTALLED = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
CUDF_DataFrame = object
|
CUDF_DataFrame = object
|
||||||
|
CUDF_Series = object
|
||||||
CUDF_INSTALLED = False
|
CUDF_INSTALLED = False
|
||||||
|
|
||||||
# sklearn
|
# sklearn
|
||||||
|
|||||||
@ -19,7 +19,7 @@ import scipy.sparse
|
|||||||
|
|
||||||
from .compat import (STRING_TYPES, PY3, DataFrame, MultiIndex, py_str,
|
from .compat import (STRING_TYPES, PY3, DataFrame, MultiIndex, py_str,
|
||||||
PANDAS_INSTALLED, DataTable,
|
PANDAS_INSTALLED, DataTable,
|
||||||
CUDF_INSTALLED, CUDF_DataFrame,
|
CUDF_INSTALLED, CUDF_DataFrame, CUDF_Series,
|
||||||
os_fspath, os_PathLike)
|
os_fspath, os_PathLike)
|
||||||
from .libpath import find_lib_path
|
from .libpath import find_lib_path
|
||||||
|
|
||||||
@ -243,26 +243,35 @@ def c_array(ctype, values):
|
|||||||
|
|
||||||
def _use_columnar_initializer(data):
|
def _use_columnar_initializer(data):
|
||||||
'''Whether should we use columnar format initializer (pass data in as
|
'''Whether should we use columnar format initializer (pass data in as
|
||||||
json string). Currently cudf is the only valid option.'''
|
json string). Currently cudf is the only valid option.'''
|
||||||
if CUDF_INSTALLED and isinstance(data, CUDF_DataFrame):
|
if CUDF_INSTALLED and (isinstance(data, (CUDF_DataFrame, CUDF_Series))):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_interface_from_cudf_series(data):
|
||||||
|
"""This returns the array interface from the cudf series. This function should
|
||||||
|
be upstreamed to cudf."""
|
||||||
|
interface = data.__cuda_array_interface__
|
||||||
|
if data.has_null_mask:
|
||||||
|
interface['mask'] = interface['mask'].__cuda_array_interface__
|
||||||
|
return interface
|
||||||
|
|
||||||
|
|
||||||
def _extract_interface_from_cudf(df, is_info):
|
def _extract_interface_from_cudf(df, is_info):
|
||||||
'''This function should be upstreamed to cudf.'''
|
"""This function should be upstreamed to cudf."""
|
||||||
if not _use_columnar_initializer(df):
|
if not _use_columnar_initializer(df):
|
||||||
raise ValueError('Only cudf is supported for initializing as json ' +
|
raise ValueError('Only cudf is supported for initializing as json ' +
|
||||||
'columnar format. For other libraries please ' +
|
'columnar format. For other libraries please ' +
|
||||||
'refer to specific API.')
|
'refer to specific API.')
|
||||||
|
|
||||||
array_interfaces = []
|
array_interfaces = []
|
||||||
for col in df.columns:
|
if isinstance(df, CUDF_DataFrame):
|
||||||
data = df[col]
|
for col in df.columns:
|
||||||
interface = data.__cuda_array_interface__
|
array_interfaces.append(
|
||||||
if data.has_null_mask:
|
_extract_interface_from_cudf_series(df[col]))
|
||||||
interface['mask'] = interface['mask'].__cuda_array_interface__
|
else:
|
||||||
array_interfaces.append(interface)
|
array_interfaces.append(_extract_interface_from_cudf_series(df))
|
||||||
|
|
||||||
if is_info:
|
if is_info:
|
||||||
array_interfaces = array_interfaces[0]
|
array_interfaces = array_interfaces[0]
|
||||||
|
|||||||
@ -27,8 +27,8 @@ def dmatrix_from_cudf(input_type, missing=np.NAN):
|
|||||||
np_label = np.random.randn(kRows).astype(input_type)
|
np_label = np.random.randn(kRows).astype(input_type)
|
||||||
pa_label = pd.DataFrame(np_label)
|
pa_label = pd.DataFrame(np_label)
|
||||||
|
|
||||||
cd: cudf.DataFrame = cudf.from_pandas(pa)
|
cd = cudf.from_pandas(pa)
|
||||||
cd_label: cudf.DataFrame = cudf.from_pandas(pa_label)
|
cd_label = cudf.from_pandas(pa_label).iloc[:, 0]
|
||||||
|
|
||||||
dtrain = xgb.DMatrix(cd, missing=missing, label=cd_label)
|
dtrain = xgb.DMatrix(cd, missing=missing, label=cd_label)
|
||||||
assert dtrain.num_col() == kCols
|
assert dtrain.num_col() == kCols
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user