Support for all primitive types from array. (#7003)
* Change C API name. * Test for all primitive types from array. * Add native support for CPU 128 float. * Convert boolean and float16 in Python. * Fix dask version for now.
This commit is contained in:
parent
816b789bf0
commit
ee4f51a631
@ -142,7 +142,7 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr,
|
|||||||
* \param out created dmatrix
|
* \param out created dmatrix
|
||||||
* \return 0 when success, -1 when failure happens
|
* \return 0 when success, -1 when failure happens
|
||||||
*/
|
*/
|
||||||
XGB_DLL int XGDMatrixCreateFromArray(char const *data,
|
XGB_DLL int XGDMatrixCreateFromDense(char const *data,
|
||||||
char const *json_config,
|
char const *json_config,
|
||||||
DMatrixHandle *out);
|
DMatrixHandle *out);
|
||||||
|
|
||||||
|
|||||||
@ -239,7 +239,18 @@ def _array_interface(data: np.ndarray) -> bytes:
|
|||||||
interface = data.__array_interface__
|
interface = data.__array_interface__
|
||||||
if "mask" in interface:
|
if "mask" in interface:
|
||||||
interface["mask"] = interface["mask"].__array_interface__
|
interface["mask"] = interface["mask"].__array_interface__
|
||||||
interface_str = bytes(json.dumps(interface, indent=2), "utf-8")
|
interface_str = bytes(json.dumps(interface), "utf-8")
|
||||||
|
return interface_str
|
||||||
|
|
||||||
|
|
||||||
|
def _cuda_array_interface(data) -> bytes:
|
||||||
|
assert (
|
||||||
|
data.dtype.hasobject is False
|
||||||
|
), "Input data contains `object` dtype. Expecting numeric data."
|
||||||
|
interface = data.__cuda_array_interface__
|
||||||
|
if "mask" in interface:
|
||||||
|
interface["mask"] = interface["mask"].__cuda_array_interface__
|
||||||
|
interface_str = bytes(json.dumps(interface), "utf-8")
|
||||||
return interface_str
|
return interface_str
|
||||||
|
|
||||||
|
|
||||||
@ -1948,10 +1959,7 @@ class Booster(object):
|
|||||||
from .data import _transform_cupy_array
|
from .data import _transform_cupy_array
|
||||||
|
|
||||||
data = _transform_cupy_array(data)
|
data = _transform_cupy_array(data)
|
||||||
interface = data.__cuda_array_interface__
|
interface_str = _cuda_array_interface(data)
|
||||||
if "mask" in interface:
|
|
||||||
interface["mask"] = interface["mask"].__cuda_array_interface__
|
|
||||||
interface_str = bytes(json.dumps(interface, indent=2), "utf-8")
|
|
||||||
_check_call(
|
_check_call(
|
||||||
_LIB.XGBoosterPredictFromCudaArray(
|
_LIB.XGBoosterPredictFromCudaArray(
|
||||||
self.handle,
|
self.handle,
|
||||||
|
|||||||
@ -9,7 +9,8 @@ from typing import Any
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from .core import c_array, _LIB, _check_call, c_str, _array_interface
|
from .core import c_array, _LIB, _check_call, c_str
|
||||||
|
from .core import _array_interface, _cuda_array_interface
|
||||||
from .core import DataIter, _ProxyDMatrix, DMatrix
|
from .core import DataIter, _ProxyDMatrix, DMatrix
|
||||||
from .compat import lazy_isinstance
|
from .compat import lazy_isinstance
|
||||||
|
|
||||||
@ -105,7 +106,7 @@ def _is_numpy_array(data):
|
|||||||
|
|
||||||
|
|
||||||
def _ensure_np_dtype(data, dtype):
|
def _ensure_np_dtype(data, dtype):
|
||||||
if data.dtype.hasobject:
|
if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]:
|
||||||
data = data.astype(np.float32, copy=False)
|
data = data.astype(np.float32, copy=False)
|
||||||
dtype = np.float32
|
dtype = np.float32
|
||||||
return data, dtype
|
return data, dtype
|
||||||
@ -141,7 +142,7 @@ def _from_numpy_array(data, missing, nthread, feature_names, feature_types):
|
|||||||
}
|
}
|
||||||
config = bytes(json.dumps(args), "utf-8")
|
config = bytes(json.dumps(args), "utf-8")
|
||||||
_check_call(
|
_check_call(
|
||||||
_LIB.XGDMatrixCreateFromArray(
|
_LIB.XGDMatrixCreateFromDense(
|
||||||
_array_interface(data),
|
_array_interface(data),
|
||||||
config,
|
config,
|
||||||
ctypes.byref(handle),
|
ctypes.byref(handle),
|
||||||
@ -416,21 +417,19 @@ def _is_cupy_array(data):
|
|||||||
|
|
||||||
|
|
||||||
def _transform_cupy_array(data):
|
def _transform_cupy_array(data):
|
||||||
|
import cupy # pylint: disable=import-error
|
||||||
if not hasattr(data, '__cuda_array_interface__') and hasattr(
|
if not hasattr(data, '__cuda_array_interface__') and hasattr(
|
||||||
data, '__array__'):
|
data, '__array__'):
|
||||||
import cupy # pylint: disable=import-error
|
|
||||||
data = cupy.array(data, copy=False)
|
data = cupy.array(data, copy=False)
|
||||||
|
if data.dtype.hasobject or data.dtype in [cupy.float16, cupy.bool_]:
|
||||||
|
data = data.astype(cupy.float32, copy=False)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
def _from_cupy_array(data, missing, nthread, feature_names, feature_types):
|
def _from_cupy_array(data, missing, nthread, feature_names, feature_types):
|
||||||
"""Initialize DMatrix from cupy ndarray."""
|
"""Initialize DMatrix from cupy ndarray."""
|
||||||
data = _transform_cupy_array(data)
|
data = _transform_cupy_array(data)
|
||||||
interface = data.__cuda_array_interface__
|
interface_str = _cuda_array_interface(data)
|
||||||
if 'mask' in interface:
|
|
||||||
interface['mask'] = interface['mask'].__cuda_array_interface__
|
|
||||||
interface_str = bytes(json.dumps(interface, indent=2), 'utf-8')
|
|
||||||
|
|
||||||
handle = ctypes.c_void_p()
|
handle = ctypes.c_void_p()
|
||||||
_check_call(
|
_check_call(
|
||||||
_LIB.XGDMatrixCreateFromArrayInterface(
|
_LIB.XGDMatrixCreateFromArrayInterface(
|
||||||
|
|||||||
@ -261,7 +261,7 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr,
|
|||||||
API_END();
|
API_END();
|
||||||
}
|
}
|
||||||
|
|
||||||
XGB_DLL int XGDMatrixCreateFromArray(char const *data,
|
XGB_DLL int XGDMatrixCreateFromDense(char const *data,
|
||||||
char const *c_json_config,
|
char const *c_json_config,
|
||||||
DMatrixHandle *out) {
|
DMatrixHandle *out) {
|
||||||
API_BEGIN();
|
API_BEGIN();
|
||||||
|
|||||||
@ -42,7 +42,8 @@ struct ArrayInterfaceErrors {
|
|||||||
return str.c_str();
|
return str.c_str();
|
||||||
}
|
}
|
||||||
static char const* Version() {
|
static char const* Version() {
|
||||||
return "Only version <= 3 of `__cuda_array_interface__' are supported.";
|
return "Only version <= 3 of "
|
||||||
|
"`__cuda_array_interface__/__array_interface__' are supported.";
|
||||||
}
|
}
|
||||||
static char const* OfType(std::string const& type) {
|
static char const* OfType(std::string const& type) {
|
||||||
static std::string str;
|
static std::string str;
|
||||||
@ -81,7 +82,7 @@ struct ArrayInterfaceErrors {
|
|||||||
return "Other";
|
return "Other";
|
||||||
default:
|
default:
|
||||||
LOG(FATAL) << "Invalid type code: " << c << " in `typestr' of input array."
|
LOG(FATAL) << "Invalid type code: " << c << " in `typestr' of input array."
|
||||||
<< "\nPlease verify the `__cuda_array_interface__' "
|
<< "\nPlease verify the `__cuda_array_interface__/__array_interface__' "
|
||||||
<< "of your input data complies to: "
|
<< "of your input data complies to: "
|
||||||
<< "https://docs.scipy.org/doc/numpy/reference/arrays.interface.html"
|
<< "https://docs.scipy.org/doc/numpy/reference/arrays.interface.html"
|
||||||
<< "\nOr open an issue.";
|
<< "\nOr open an issue.";
|
||||||
@ -90,7 +91,7 @@ struct ArrayInterfaceErrors {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static std::string UnSupportedType(StringView typestr) {
|
static std::string UnSupportedType(StringView typestr) {
|
||||||
return TypeStr(typestr[1]) + " is not supported.";
|
return TypeStr(typestr[1]) + "-" + typestr[2] + " is not supported.";
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -135,8 +136,9 @@ class ArrayInterfaceHandler {
|
|||||||
if (array.find("typestr") == array.cend()) {
|
if (array.find("typestr") == array.cend()) {
|
||||||
LOG(FATAL) << "Missing `typestr' field for array interface";
|
LOG(FATAL) << "Missing `typestr' field for array interface";
|
||||||
}
|
}
|
||||||
|
|
||||||
auto typestr = get<String const>(array.at("typestr"));
|
auto typestr = get<String const>(array.at("typestr"));
|
||||||
CHECK_EQ(typestr.size(), 3) << ArrayInterfaceErrors::TypestrFormat();
|
CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat();
|
||||||
CHECK_NE(typestr.front(), '>') << ArrayInterfaceErrors::BigEndian();
|
CHECK_NE(typestr.front(), '>') << ArrayInterfaceErrors::BigEndian();
|
||||||
|
|
||||||
if (array.find("shape") == array.cend()) {
|
if (array.find("shape") == array.cend()) {
|
||||||
@ -295,7 +297,7 @@ class ArrayInterface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
enum Type : std::int8_t { kF4, kF8, kI1, kI2, kI4, kI8, kU1, kU2, kU4, kU8 };
|
enum Type : std::int8_t { kF4, kF8, kF16, kI1, kI2, kI4, kI8, kU1, kU2, kU4, kU8 };
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ArrayInterface() = default;
|
ArrayInterface() = default;
|
||||||
@ -331,7 +333,12 @@ class ArrayInterface {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void AssignType(StringView typestr) {
|
void AssignType(StringView typestr) {
|
||||||
if (typestr[1] == 'f' && typestr[2] == '4') {
|
if (typestr.size() == 4 && typestr[1] == 'f' && typestr[2] == '1' &&
|
||||||
|
typestr[3] == '6') {
|
||||||
|
type = kF16;
|
||||||
|
CHECK(sizeof(long double) == 16)
|
||||||
|
<< "128-bit floating point is not supported on current platform.";
|
||||||
|
} else if (typestr[1] == 'f' && typestr[2] == '4') {
|
||||||
type = kF4;
|
type = kF4;
|
||||||
} else if (typestr[1] == 'f' && typestr[2] == '8') {
|
} else if (typestr[1] == 'f' && typestr[2] == '8') {
|
||||||
type = kF8;
|
type = kF8;
|
||||||
@ -364,6 +371,16 @@ class ArrayInterface {
|
|||||||
return func(reinterpret_cast<float *>(data));
|
return func(reinterpret_cast<float *>(data));
|
||||||
case kF8:
|
case kF8:
|
||||||
return func(reinterpret_cast<double *>(data));
|
return func(reinterpret_cast<double *>(data));
|
||||||
|
#ifdef __CUDA_ARCH__
|
||||||
|
case kF16: {
|
||||||
|
// CUDA device code doesn't support long double.
|
||||||
|
SPAN_CHECK(false);
|
||||||
|
return func(reinterpret_cast<double *>(data));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
case kF16:
|
||||||
|
return func(reinterpret_cast<long double *>(data));
|
||||||
|
#endif
|
||||||
case kI1:
|
case kI1:
|
||||||
return func(reinterpret_cast<int8_t *>(data));
|
return func(reinterpret_cast<int8_t *>(data));
|
||||||
case kI2:
|
case kI2:
|
||||||
|
|||||||
@ -13,8 +13,8 @@ dependencies:
|
|||||||
- scikit-learn
|
- scikit-learn
|
||||||
- pandas
|
- pandas
|
||||||
- matplotlib
|
- matplotlib
|
||||||
- dask
|
- dask=2021.05.0
|
||||||
- distributed
|
- distributed=2021.05.0
|
||||||
- graphviz
|
- graphviz
|
||||||
- python-graphviz
|
- python-graphviz
|
||||||
- hypothesis
|
- hypothesis
|
||||||
|
|||||||
@ -204,6 +204,7 @@ class TestGPUPredict:
|
|||||||
cpu_predt = reg.predict(X)
|
cpu_predt = reg.predict(X)
|
||||||
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
|
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
@pytest.mark.skipif(**tm.no_cudf())
|
@pytest.mark.skipif(**tm.no_cudf())
|
||||||
def test_inplace_predict_cudf(self):
|
def test_inplace_predict_cudf(self):
|
||||||
import cupy as cp
|
import cupy as cp
|
||||||
@ -332,6 +333,7 @@ class TestGPUPredict:
|
|||||||
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
|
rmse = mean_squared_error(y_true=y, y_pred=pred, squared=False)
|
||||||
np.testing.assert_almost_equal(rmse, eval_history['train']['rmse'][-1], decimal=5)
|
np.testing.assert_almost_equal(rmse, eval_history['train']['rmse'][-1], decimal=5)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
@pytest.mark.parametrize("n_classes", [2, 3])
|
@pytest.mark.parametrize("n_classes", [2, 3])
|
||||||
def test_predict_dart(self, n_classes):
|
def test_predict_dart(self, n_classes):
|
||||||
from sklearn.datasets import make_classification
|
from sklearn.datasets import make_classification
|
||||||
@ -378,3 +380,59 @@ class TestGPUPredict:
|
|||||||
|
|
||||||
copied = cp.array(copied)
|
copied = cp.array(copied)
|
||||||
cp.testing.assert_allclose(inplace, copied, atol=1e-6)
|
cp.testing.assert_allclose(inplace, copied, atol=1e-6)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(**tm.no_cupy())
|
||||||
|
def test_dtypes(self):
|
||||||
|
import cupy as cp
|
||||||
|
rows = 1000
|
||||||
|
cols = 10
|
||||||
|
rng = cp.random.RandomState(1994)
|
||||||
|
orig = rng.randint(low=0, high=127, size=rows * cols).reshape(
|
||||||
|
rows, cols
|
||||||
|
)
|
||||||
|
y = rng.randint(low=0, high=127, size=rows)
|
||||||
|
dtrain = xgb.DMatrix(orig, label=y)
|
||||||
|
booster = xgb.train({"tree_method": "gpu_hist"}, dtrain)
|
||||||
|
|
||||||
|
predt_orig = booster.inplace_predict(orig)
|
||||||
|
# all primitive types in numpy
|
||||||
|
for dtype in [
|
||||||
|
cp.signedinteger,
|
||||||
|
cp.byte,
|
||||||
|
cp.short,
|
||||||
|
cp.intc,
|
||||||
|
cp.int_,
|
||||||
|
cp.longlong,
|
||||||
|
cp.unsignedinteger,
|
||||||
|
cp.ubyte,
|
||||||
|
cp.ushort,
|
||||||
|
cp.uintc,
|
||||||
|
cp.uint,
|
||||||
|
cp.ulonglong,
|
||||||
|
cp.floating,
|
||||||
|
cp.half,
|
||||||
|
cp.single,
|
||||||
|
cp.double,
|
||||||
|
]:
|
||||||
|
X = cp.array(orig, dtype=dtype)
|
||||||
|
predt = booster.inplace_predict(X)
|
||||||
|
cp.testing.assert_allclose(predt, predt_orig)
|
||||||
|
|
||||||
|
# boolean
|
||||||
|
orig = cp.random.binomial(1, 0.5, size=rows * cols).reshape(
|
||||||
|
rows, cols
|
||||||
|
)
|
||||||
|
predt_orig = booster.inplace_predict(orig)
|
||||||
|
for dtype in [cp.bool8, cp.bool_]:
|
||||||
|
X = cp.array(orig, dtype=dtype)
|
||||||
|
predt = booster.inplace_predict(X)
|
||||||
|
cp.testing.assert_allclose(predt, predt_orig)
|
||||||
|
|
||||||
|
# unsupported types
|
||||||
|
for dtype in [
|
||||||
|
cp.complex64,
|
||||||
|
cp.complex128,
|
||||||
|
]:
|
||||||
|
X = cp.array(orig, dtype=dtype)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
booster.inplace_predict(X)
|
||||||
|
|||||||
@ -237,3 +237,51 @@ class TestInplacePredict:
|
|||||||
dtrain = xgb.DMatrix(self.X, self.y, base_margin=base_margin)
|
dtrain = xgb.DMatrix(self.X, self.y, base_margin=base_margin)
|
||||||
from_dmatrix = booster.predict(dtrain)
|
from_dmatrix = booster.predict(dtrain)
|
||||||
np.testing.assert_allclose(from_dmatrix, from_inplace)
|
np.testing.assert_allclose(from_dmatrix, from_inplace)
|
||||||
|
|
||||||
|
def test_dtypes(self):
|
||||||
|
orig = self.rng.randint(low=0, high=127, size=self.rows * self.cols).reshape(
|
||||||
|
self.rows, self.cols
|
||||||
|
)
|
||||||
|
predt_orig = self.booster.inplace_predict(orig)
|
||||||
|
# all primitive types in numpy
|
||||||
|
for dtype in [
|
||||||
|
np.signedinteger,
|
||||||
|
np.byte,
|
||||||
|
np.short,
|
||||||
|
np.intc,
|
||||||
|
np.int_,
|
||||||
|
np.longlong,
|
||||||
|
np.unsignedinteger,
|
||||||
|
np.ubyte,
|
||||||
|
np.ushort,
|
||||||
|
np.uintc,
|
||||||
|
np.uint,
|
||||||
|
np.ulonglong,
|
||||||
|
np.floating,
|
||||||
|
np.half,
|
||||||
|
np.single,
|
||||||
|
np.double,
|
||||||
|
]:
|
||||||
|
X = np.array(orig, dtype=dtype)
|
||||||
|
predt = self.booster.inplace_predict(X)
|
||||||
|
np.testing.assert_allclose(predt, predt_orig)
|
||||||
|
|
||||||
|
# boolean
|
||||||
|
orig = self.rng.binomial(1, 0.5, size=self.rows * self.cols).reshape(
|
||||||
|
self.rows, self.cols
|
||||||
|
)
|
||||||
|
predt_orig = self.booster.inplace_predict(orig)
|
||||||
|
for dtype in [np.bool8, np.bool_]:
|
||||||
|
X = np.array(orig, dtype=dtype)
|
||||||
|
predt = self.booster.inplace_predict(X)
|
||||||
|
np.testing.assert_allclose(predt, predt_orig)
|
||||||
|
|
||||||
|
# unsupported types
|
||||||
|
for dtype in [
|
||||||
|
np.string_,
|
||||||
|
np.complex64,
|
||||||
|
np.complex128,
|
||||||
|
]:
|
||||||
|
X = np.array(orig, dtype=dtype)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
self.booster.inplace_predict(X)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user