Expose DMatrix API for CUDA columnar and array. (#7217)

* Use JSON encoded configurations.
* Expose them into header file.
This commit is contained in:
Jiaming Yuan 2021-09-09 17:55:25 +08:00 committed by GitHub
parent 68a2c7b8d6
commit 804b2ac60f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 57 additions and 24 deletions

View File

@ -210,6 +210,36 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data,
DMatrixHandle* out, DMatrixHandle* out,
int nthread); int nthread);
/*!
* \brief Create DMatrix from CUDA columnar format. (cuDF)
* \param data Array of JSON encoded __cuda_array_interface__ for each column.
* \param json_config JSON encoded configuration. Required values are:
*
* - missing
* - nthread
*
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
char const* json_config,
DMatrixHandle *out);
/*!
* \brief Create DMatrix from CUDA array.
* \param data JSON encoded __cuda_array_interface__ for array data.
* \param json_config JSON encoded configuration. Required values are:
*
* - missing
* - nthread
*
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
char const* json_config,
DMatrixHandle *out);
/* /*
* ========================== Begin data callback APIs ========================= * ========================== Begin data callback APIs =========================
* *

View File

@ -421,17 +421,17 @@ def _transform_cudf_df(data, feature_names, feature_types, enable_categorical):
def _from_cudf_df( def _from_cudf_df(
data, missing, nthread, feature_names, feature_types, enable_categorical data, missing, nthread, feature_names, feature_types, enable_categorical
): ) -> Tuple[ctypes.c_void_p, Any, Any]:
data, feature_names, feature_types = _transform_cudf_df( data, feature_names, feature_types = _transform_cudf_df(
data, feature_names, feature_types, enable_categorical data, feature_names, feature_types, enable_categorical
) )
_, interfaces_str = _cudf_array_interfaces(data) _, interfaces_str = _cudf_array_interfaces(data)
handle = ctypes.c_void_p() handle = ctypes.c_void_p()
config = bytes(json.dumps({"missing": missing, "nthread": nthread}), "utf-8")
_check_call( _check_call(
_LIB.XGDMatrixCreateFromArrayInterfaceColumns( _LIB.XGDMatrixCreateFromCudaColumnar(
interfaces_str, interfaces_str,
ctypes.c_float(missing), config,
ctypes.c_int(nthread),
ctypes.byref(handle), ctypes.byref(handle),
) )
) )
@ -469,11 +469,11 @@ def _from_cupy_array(data, missing, nthread, feature_names, feature_types):
data = _transform_cupy_array(data) data = _transform_cupy_array(data)
interface_str = _cuda_array_interface(data) interface_str = _cuda_array_interface(data)
handle = ctypes.c_void_p() handle = ctypes.c_void_p()
config = bytes(json.dumps({"missing": missing, "nthread": nthread}), "utf-8")
_check_call( _check_call(
_LIB.XGDMatrixCreateFromArrayInterface( _LIB.XGDMatrixCreateFromCudaArrayInterface(
interface_str, interface_str,
ctypes.c_float(missing), config,
ctypes.c_int(nthread),
ctypes.byref(handle))) ctypes.byref(handle)))
return handle, feature_names, feature_types return handle, feature_names, feature_types

View File

@ -169,18 +169,16 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
} }
#ifndef XGBOOST_USE_CUDA #ifndef XGBOOST_USE_CUDA
XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs, XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
bst_float missing, char const* c_json_config,
int nthread,
DMatrixHandle *out) { DMatrixHandle *out) {
API_BEGIN(); API_BEGIN();
common::AssertGPUSupport(); common::AssertGPUSupport();
API_END(); API_END();
} }
XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs, XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
bst_float missing, char const* c_json_config,
int nthread,
DMatrixHandle *out) { DMatrixHandle *out) {
API_BEGIN(); API_BEGIN();
common::AssertGPUSupport(); common::AssertGPUSupport();

View File

@ -26,23 +26,28 @@ void XGBoostAPIGuard::RestoreGPUAttribute() {
using namespace xgboost; // NOLINT using namespace xgboost; // NOLINT
XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs, XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
bst_float missing, char const* c_json_config,
int nthread,
DMatrixHandle *out) { DMatrixHandle *out) {
API_BEGIN(); API_BEGIN();
std::string json_str{c_json_strs}; std::string json_str{data};
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto nthread = get<Integer const>(config["nthread"]);
data::CudfAdapter adapter(json_str); data::CudfAdapter adapter(json_str);
*out = *out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread)); new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
API_END(); API_END();
} }
XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs, XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
bst_float missing, int nthread, char const* c_json_config,
DMatrixHandle *out) { DMatrixHandle *out) {
API_BEGIN(); API_BEGIN();
std::string json_str{c_json_strs}; std::string json_str{data};
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto nthread = get<Integer const>(config["nthread"]);
data::CupyAdapter adapter(json_str); data::CupyAdapter adapter(json_str);
*out = *out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread)); new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));