Support half type for pandas. (#8481)
This commit is contained in:
@@ -30,6 +30,7 @@ from .core import (
|
||||
c_array,
|
||||
c_str,
|
||||
from_pystr_to_cstr,
|
||||
make_jcargs,
|
||||
)
|
||||
|
||||
DispatchedDataBackendReturnType = Tuple[
|
||||
@@ -184,24 +185,15 @@ def _from_numpy_array(
|
||||
feature_names: Optional[FeatureNames],
|
||||
feature_types: Optional[FeatureTypes],
|
||||
) -> DispatchedDataBackendReturnType:
|
||||
"""Initialize data from a 2-D numpy matrix.
|
||||
|
||||
"""
|
||||
"""Initialize data from a 2-D numpy matrix."""
|
||||
if len(data.shape) != 2:
|
||||
raise ValueError(
|
||||
"Expecting 2 dimensional numpy.ndarray, got: ", data.shape
|
||||
)
|
||||
raise ValueError("Expecting 2 dimensional numpy.ndarray, got: ", data.shape)
|
||||
data, _ = _ensure_np_dtype(data, data.dtype)
|
||||
handle = ctypes.c_void_p()
|
||||
args = {
|
||||
"missing": float(missing),
|
||||
"nthread": int(nthread),
|
||||
}
|
||||
config = bytes(json.dumps(args), "utf-8")
|
||||
_check_call(
|
||||
_LIB.XGDMatrixCreateFromDense(
|
||||
_array_interface(data),
|
||||
config,
|
||||
make_jcargs(missing=float(missing), nthread=int(nthread)),
|
||||
ctypes.byref(handle),
|
||||
)
|
||||
)
|
||||
@@ -1205,6 +1197,7 @@ def _proxy_transform(
|
||||
arr, feature_names, feature_types = _transform_pandas_df(
|
||||
data, enable_categorical, feature_names, feature_types
|
||||
)
|
||||
arr, _ = _ensure_np_dtype(arr, arr.dtype)
|
||||
return arr, None, feature_names, feature_types
|
||||
raise TypeError("Value type is not supported for data iterator:" + str(type(data)))
|
||||
|
||||
|
||||
62
python-package/xgboost/testing/data.py
Normal file
62
python-package/xgboost/testing/data.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""Utilities for data generation."""
|
||||
from typing import Generator, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def np_dtypes(
|
||||
n_samples: int, n_features: int
|
||||
) -> Generator[Tuple[np.ndarray, np.ndarray], None, None]:
|
||||
"""Generate all supported dtypes from numpy."""
|
||||
import pandas as pd
|
||||
|
||||
rng = np.random.RandomState(1994)
|
||||
# Integer and float.
|
||||
orig = rng.randint(low=0, high=127, size=n_samples * n_features).reshape(
|
||||
n_samples, n_features
|
||||
)
|
||||
dtypes = [
|
||||
np.int32,
|
||||
np.int64,
|
||||
np.byte,
|
||||
np.short,
|
||||
np.intc,
|
||||
np.int_,
|
||||
np.longlong,
|
||||
np.uint32,
|
||||
np.uint64,
|
||||
np.ubyte,
|
||||
np.ushort,
|
||||
np.uintc,
|
||||
np.uint,
|
||||
np.ulonglong,
|
||||
np.float16,
|
||||
np.float32,
|
||||
np.float64,
|
||||
np.half,
|
||||
np.single,
|
||||
np.double,
|
||||
]
|
||||
for dtype in dtypes:
|
||||
X = np.array(orig, dtype=dtype)
|
||||
yield orig, X
|
||||
|
||||
for dtype in dtypes:
|
||||
X = np.array(orig, dtype=dtype)
|
||||
df_orig = pd.DataFrame(orig)
|
||||
df = pd.DataFrame(X)
|
||||
yield df_orig, df
|
||||
|
||||
# Boolean
|
||||
orig = rng.binomial(1, 0.5, size=n_samples * n_features).reshape(
|
||||
n_samples, n_features
|
||||
)
|
||||
for dtype in [np.bool_, np.bool8, bool]:
|
||||
X = np.array(orig, dtype=dtype)
|
||||
yield orig, X
|
||||
|
||||
for dtype in [np.bool_, np.bool8, bool]:
|
||||
X = np.array(orig, dtype=dtype)
|
||||
df_orig = pd.DataFrame(orig)
|
||||
df = pd.DataFrame(X)
|
||||
yield df_orig, df
|
||||
Reference in New Issue
Block a user