Expose DMatrix API for CUDA columnar and array. (#7217)

* Use JSON encoded configurations.
* Expose them into header file.
This commit is contained in:
Jiaming Yuan 2021-09-09 17:55:25 +08:00 committed by GitHub
parent 68a2c7b8d6
commit 804b2ac60f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 57 additions and 24 deletions

View File

@ -210,6 +210,36 @@ XGB_DLL int XGDMatrixCreateFromDT(void** data,
DMatrixHandle* out,
int nthread);
/*!
* \brief Create DMatrix from CUDA columnar format. (cuDF)
* \param data Array of JSON encoded __cuda_array_interface__ for each column.
* \param json_config JSON encoded configuration. Required values are:
*
* - missing
* - nthread
*
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
char const* json_config,
DMatrixHandle *out);
/*!
* \brief Create DMatrix from CUDA array.
* \param data JSON encoded __cuda_array_interface__ for array data.
* \param json_config JSON encoded configuration. Required values are:
*
* - missing
* - nthread
*
* \param out created dmatrix
* \return 0 when success, -1 when failure happens
*/
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
char const* json_config,
DMatrixHandle *out);
/*
* ========================== Begin data callback APIs =========================
*

View File

@ -421,17 +421,17 @@ def _transform_cudf_df(data, feature_names, feature_types, enable_categorical):
def _from_cudf_df(
data, missing, nthread, feature_names, feature_types, enable_categorical
):
) -> Tuple[ctypes.c_void_p, Any, Any]:
data, feature_names, feature_types = _transform_cudf_df(
data, feature_names, feature_types, enable_categorical
)
_, interfaces_str = _cudf_array_interfaces(data)
handle = ctypes.c_void_p()
config = bytes(json.dumps({"missing": missing, "nthread": nthread}), "utf-8")
_check_call(
_LIB.XGDMatrixCreateFromArrayInterfaceColumns(
_LIB.XGDMatrixCreateFromCudaColumnar(
interfaces_str,
ctypes.c_float(missing),
ctypes.c_int(nthread),
config,
ctypes.byref(handle),
)
)
@ -469,11 +469,11 @@ def _from_cupy_array(data, missing, nthread, feature_names, feature_types):
data = _transform_cupy_array(data)
interface_str = _cuda_array_interface(data)
handle = ctypes.c_void_p()
config = bytes(json.dumps({"missing": missing, "nthread": nthread}), "utf-8")
_check_call(
_LIB.XGDMatrixCreateFromArrayInterface(
_LIB.XGDMatrixCreateFromCudaArrayInterface(
interface_str,
ctypes.c_float(missing),
ctypes.c_int(nthread),
config,
ctypes.byref(handle)))
return handle, feature_names, feature_types

View File

@ -169,18 +169,16 @@ XGB_DLL int XGDMatrixCreateFromDataIter(
}
#ifndef XGBOOST_USE_CUDA
XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs,
bst_float missing,
int nthread,
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
char const* c_json_config,
DMatrixHandle *out) {
API_BEGIN();
common::AssertGPUSupport();
API_END();
}
XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
bst_float missing,
int nthread,
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
char const* c_json_config,
DMatrixHandle *out) {
API_BEGIN();
common::AssertGPUSupport();

View File

@ -26,23 +26,28 @@ void XGBoostAPIGuard::RestoreGPUAttribute() {
using namespace xgboost; // NOLINT
XGB_DLL int XGDMatrixCreateFromArrayInterfaceColumns(char const* c_json_strs,
bst_float missing,
int nthread,
XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data,
char const* c_json_config,
DMatrixHandle *out) {
API_BEGIN();
std::string json_str{c_json_strs};
std::string json_str{data};
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto nthread = get<Integer const>(config["nthread"]);
data::CudfAdapter adapter(json_str);
*out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));
API_END();
}
XGB_DLL int XGDMatrixCreateFromArrayInterface(char const* c_json_strs,
bst_float missing, int nthread,
XGB_DLL int XGDMatrixCreateFromCudaArrayInterface(char const *data,
char const* c_json_config,
DMatrixHandle *out) {
API_BEGIN();
std::string json_str{c_json_strs};
std::string json_str{data};
auto config = Json::Load(StringView{c_json_config});
float missing = GetMissing(config);
auto nthread = get<Integer const>(config["nthread"]);
data::CupyAdapter adapter(json_str);
*out =
new std::shared_ptr<DMatrix>(DMatrix::Create(&adapter, missing, nthread));