[R] Add missing DMatrix functions (#9929)
* `XGDMatrixGetQuantileCut` * `XGDMatrixNumNonMissing` * `XGDMatrixGetDataAsCSR` --------- Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
This commit is contained in:
@@ -63,6 +63,7 @@ OBJECTS= \
|
||||
$(PKGROOT)/src/gbm/gblinear.o \
|
||||
$(PKGROOT)/src/gbm/gblinear_model.o \
|
||||
$(PKGROOT)/src/data/adapter.o \
|
||||
$(PKGROOT)/src/data/array_interface.o \
|
||||
$(PKGROOT)/src/data/simple_dmatrix.o \
|
||||
$(PKGROOT)/src/data/data.o \
|
||||
$(PKGROOT)/src/data/sparse_page_raw_format.o \
|
||||
|
||||
@@ -63,6 +63,7 @@ OBJECTS= \
|
||||
$(PKGROOT)/src/gbm/gblinear.o \
|
||||
$(PKGROOT)/src/gbm/gblinear_model.o \
|
||||
$(PKGROOT)/src/data/adapter.o \
|
||||
$(PKGROOT)/src/data/array_interface.o \
|
||||
$(PKGROOT)/src/data/simple_dmatrix.o \
|
||||
$(PKGROOT)/src/data/data.o \
|
||||
$(PKGROOT)/src/data/sparse_page_raw_format.o \
|
||||
|
||||
@@ -45,6 +45,9 @@ extern SEXP XGDMatrixCreateFromDF_R(SEXP, SEXP, SEXP);
|
||||
extern SEXP XGDMatrixGetStrFeatureInfo_R(SEXP, SEXP);
|
||||
extern SEXP XGDMatrixNumCol_R(SEXP);
|
||||
extern SEXP XGDMatrixNumRow_R(SEXP);
|
||||
extern SEXP XGDMatrixGetQuantileCut_R(SEXP);
|
||||
extern SEXP XGDMatrixNumNonMissing_R(SEXP);
|
||||
extern SEXP XGDMatrixGetDataAsCSR_R(SEXP);
|
||||
extern SEXP XGDMatrixSaveBinary_R(SEXP, SEXP, SEXP);
|
||||
extern SEXP XGDMatrixSetInfo_R(SEXP, SEXP, SEXP);
|
||||
extern SEXP XGDMatrixSetStrFeatureInfo_R(SEXP, SEXP, SEXP);
|
||||
@@ -84,6 +87,9 @@ static const R_CallMethodDef CallEntries[] = {
|
||||
{"XGDMatrixGetStrFeatureInfo_R", (DL_FUNC) &XGDMatrixGetStrFeatureInfo_R, 2},
|
||||
{"XGDMatrixNumCol_R", (DL_FUNC) &XGDMatrixNumCol_R, 1},
|
||||
{"XGDMatrixNumRow_R", (DL_FUNC) &XGDMatrixNumRow_R, 1},
|
||||
{"XGDMatrixGetQuantileCut_R", (DL_FUNC) &XGDMatrixGetQuantileCut_R, 1},
|
||||
{"XGDMatrixNumNonMissing_R", (DL_FUNC) &XGDMatrixNumNonMissing_R, 1},
|
||||
{"XGDMatrixGetDataAsCSR_R", (DL_FUNC) &XGDMatrixGetDataAsCSR_R, 1},
|
||||
{"XGDMatrixSaveBinary_R", (DL_FUNC) &XGDMatrixSaveBinary_R, 3},
|
||||
{"XGDMatrixSetInfo_R", (DL_FUNC) &XGDMatrixSetInfo_R, 3},
|
||||
{"XGDMatrixSetStrFeatureInfo_R", (DL_FUNC) &XGDMatrixSetStrFeatureInfo_R, 3},
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Copyright 2014-2023 by XGBoost Contributors
|
||||
* Copyright 2014-2024, XGBoost Contributors
|
||||
*/
|
||||
#include <dmlc/common.h>
|
||||
#include <dmlc/omp.h>
|
||||
@@ -9,9 +9,11 @@
|
||||
#include <xgboost/logging.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
@@ -20,14 +22,14 @@
|
||||
#include "../../src/c_api/c_api_error.h"
|
||||
#include "../../src/c_api/c_api_utils.h" // MakeSparseFromPtr
|
||||
#include "../../src/common/threading_utils.h"
|
||||
#include "../../src/data/array_interface.h" // for ArrayInterface
|
||||
|
||||
#include "./xgboost_R.h" // Must follow other includes.
|
||||
|
||||
namespace {
|
||||
|
||||
struct ErrorWithUnwind : public std::exception {};
|
||||
|
||||
void ThrowExceptionFromRError(void *unused, Rboolean jump) {
|
||||
void ThrowExceptionFromRError(void *, Rboolean jump) {
|
||||
if (jump) {
|
||||
throw ErrorWithUnwind();
|
||||
}
|
||||
@@ -49,6 +51,30 @@ SEXP SafeMkChar(const char *c_str, SEXP continuation_token) {
|
||||
continuation_token);
|
||||
}
|
||||
|
||||
SEXP WrappedAllocReal(void *void_ptr) {
|
||||
size_t *size = static_cast<size_t*>(void_ptr);
|
||||
return Rf_allocVector(REALSXP, *size);
|
||||
}
|
||||
|
||||
SEXP SafeAllocReal(size_t size, SEXP continuation_token) {
|
||||
return R_UnwindProtect(
|
||||
WrappedAllocReal, static_cast<void*>(&size),
|
||||
ThrowExceptionFromRError, nullptr,
|
||||
continuation_token);
|
||||
}
|
||||
|
||||
SEXP WrappedAllocInteger(void *void_ptr) {
|
||||
size_t *size = static_cast<size_t*>(void_ptr);
|
||||
return Rf_allocVector(INTSXP, *size);
|
||||
}
|
||||
|
||||
SEXP SafeAllocInteger(size_t size, SEXP continuation_token) {
|
||||
return R_UnwindProtect(
|
||||
WrappedAllocInteger, static_cast<void*>(&size),
|
||||
ThrowExceptionFromRError, nullptr,
|
||||
continuation_token);
|
||||
}
|
||||
|
||||
[[nodiscard]] std::string MakeArrayInterfaceFromRMat(SEXP R_mat) {
|
||||
SEXP mat_dims = Rf_getAttrib(R_mat, R_DimSymbol);
|
||||
if (Rf_xlength(mat_dims) > 2) {
|
||||
@@ -136,6 +162,37 @@ SEXP SafeMkChar(const char *c_str, SEXP continuation_token) {
|
||||
jconfig["nthread"] = Rf_asInteger(n_threads);
|
||||
return Json::Dump(jconfig);
|
||||
}
|
||||
|
||||
// Allocate a R vector and copy an array interface encoded object to it.
|
||||
[[nodiscard]] SEXP CopyArrayToR(const char *array_str, SEXP ctoken) {
|
||||
xgboost::ArrayInterface<1> array{xgboost::StringView{array_str}};
|
||||
// R supports only int and double.
|
||||
bool is_int =
|
||||
xgboost::DispatchDType(array.type, [](auto t) { return std::is_integral_v<decltype(t)>; });
|
||||
bool is_float = xgboost::DispatchDType(
|
||||
array.type, [](auto v) { return std::is_floating_point_v<decltype(v)>; });
|
||||
CHECK(is_int || is_float) << "Internal error: Invalid DType.";
|
||||
CHECK(array.is_contiguous) << "Internal error: Return by XGBoost should be contiguous";
|
||||
|
||||
// Allocate memory in R
|
||||
SEXP out =
|
||||
Rf_protect(is_int ? SafeAllocInteger(array.n, ctoken) : SafeAllocReal(array.n, ctoken));
|
||||
|
||||
xgboost::DispatchDType(array.type, [&](auto t) {
|
||||
using T = decltype(t);
|
||||
auto in_ptr = static_cast<T const *>(array.data);
|
||||
if (is_int) {
|
||||
auto out_ptr = INTEGER(out);
|
||||
std::copy_n(in_ptr, array.n, out_ptr);
|
||||
} else {
|
||||
auto out_ptr = REAL(out);
|
||||
std::copy_n(in_ptr, array.n, out_ptr);
|
||||
}
|
||||
});
|
||||
|
||||
Rf_unprotect(1);
|
||||
return out;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
struct RRNGStateController {
|
||||
@@ -540,6 +597,73 @@ XGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle) {
|
||||
return ScalarInteger(static_cast<int>(ncol));
|
||||
}
|
||||
|
||||
XGB_DLL SEXP XGDMatrixGetQuantileCut_R(SEXP handle) {
|
||||
const char *out_names[] = {"indptr", "data", ""};
|
||||
SEXP continuation_token = Rf_protect(R_MakeUnwindCont());
|
||||
SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names));
|
||||
R_API_BEGIN();
|
||||
const char *out_indptr;
|
||||
const char *out_data;
|
||||
CHECK_CALL(XGDMatrixGetQuantileCut(R_ExternalPtrAddr(handle), "{}", &out_indptr, &out_data));
|
||||
try {
|
||||
SET_VECTOR_ELT(out, 0, CopyArrayToR(out_indptr, continuation_token));
|
||||
SET_VECTOR_ELT(out, 1, CopyArrayToR(out_data, continuation_token));
|
||||
} catch (ErrorWithUnwind &e) {
|
||||
R_ContinueUnwind(continuation_token);
|
||||
}
|
||||
R_API_END();
|
||||
Rf_unprotect(2);
|
||||
return out;
|
||||
}
|
||||
|
||||
XGB_DLL SEXP XGDMatrixNumNonMissing_R(SEXP handle) {
|
||||
SEXP out = Rf_protect(Rf_allocVector(REALSXP, 1));
|
||||
R_API_BEGIN();
|
||||
bst_ulong out_;
|
||||
CHECK_CALL(XGDMatrixNumNonMissing(R_ExternalPtrAddr(handle), &out_));
|
||||
REAL(out)[0] = static_cast<double>(out_);
|
||||
R_API_END();
|
||||
Rf_unprotect(1);
|
||||
return out;
|
||||
}
|
||||
|
||||
XGB_DLL SEXP XGDMatrixGetDataAsCSR_R(SEXP handle) {
|
||||
const char *out_names[] = {"indptr", "indices", "data", "ncols", ""};
|
||||
SEXP out = Rf_protect(Rf_mkNamed(VECSXP, out_names));
|
||||
R_API_BEGIN();
|
||||
|
||||
bst_ulong nrows, ncols, nnz;
|
||||
CHECK_CALL(XGDMatrixNumRow(R_ExternalPtrAddr(handle), &nrows));
|
||||
CHECK_CALL(XGDMatrixNumCol(R_ExternalPtrAddr(handle), &ncols));
|
||||
CHECK_CALL(XGDMatrixNumNonMissing(R_ExternalPtrAddr(handle), &nnz));
|
||||
if (std::max(nrows, ncols) > std::numeric_limits<int>::max()) {
|
||||
Rf_error("%s", "Error: resulting DMatrix data does not fit into R 'dgRMatrix'.");
|
||||
}
|
||||
|
||||
SET_VECTOR_ELT(out, 0, Rf_allocVector(INTSXP, nrows + 1));
|
||||
SET_VECTOR_ELT(out, 1, Rf_allocVector(INTSXP, nnz));
|
||||
SET_VECTOR_ELT(out, 2, Rf_allocVector(REALSXP, nnz));
|
||||
SET_VECTOR_ELT(out, 3, Rf_ScalarInteger(ncols));
|
||||
|
||||
std::unique_ptr<bst_ulong[]> indptr(new bst_ulong[nrows + 1]);
|
||||
std::unique_ptr<unsigned[]> indices(new unsigned[nnz]);
|
||||
std::unique_ptr<float[]> data(new float[nnz]);
|
||||
|
||||
CHECK_CALL(XGDMatrixGetDataAsCSR(R_ExternalPtrAddr(handle),
|
||||
"{}",
|
||||
indptr.get(),
|
||||
indices.get(),
|
||||
data.get()));
|
||||
|
||||
std::copy(indptr.get(), indptr.get() + nrows + 1, INTEGER(VECTOR_ELT(out, 0)));
|
||||
std::copy(indices.get(), indices.get() + nnz, INTEGER(VECTOR_ELT(out, 1)));
|
||||
std::copy(data.get(), data.get() + nnz, REAL(VECTOR_ELT(out, 2)));
|
||||
|
||||
R_API_END();
|
||||
Rf_unprotect(1);
|
||||
return out;
|
||||
}
|
||||
|
||||
// functions related to booster
|
||||
void _BoosterFinalizer(SEXP ext) {
|
||||
if (R_ExternalPtrAddr(ext) == NULL) return;
|
||||
|
||||
@@ -143,6 +143,31 @@ XGB_DLL SEXP XGDMatrixNumRow_R(SEXP handle);
|
||||
*/
|
||||
XGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle);
|
||||
|
||||
/*!
|
||||
* \brief return the quantile cuts used for the histogram method
|
||||
* \param handle an instance of data matrix
|
||||
* \return A list with entries 'indptr' and 'data'
|
||||
*/
|
||||
XGB_DLL SEXP XGDMatrixGetQuantileCut_R(SEXP handle);
|
||||
|
||||
/*!
|
||||
* \brief get the number of non-missing entries in a dmatrix
|
||||
* \param handle an instance of data matrix
|
||||
* \return the number of non-missing entries
|
||||
*/
|
||||
XGB_DLL SEXP XGDMatrixNumNonMissing_R(SEXP handle);
|
||||
|
||||
/*!
|
||||
* \brief get the data in a dmatrix in CSR format
|
||||
* \param handle an instance of data matrix
|
||||
* \return R list with the following entries in this order:
|
||||
* - 'indptr
|
||||
* - 'indices
|
||||
* - 'data'
|
||||
* - 'ncol'
|
||||
*/
|
||||
XGB_DLL SEXP XGDMatrixGetDataAsCSR_R(SEXP handle);
|
||||
|
||||
/*!
|
||||
* \brief create xgboost learner
|
||||
* \param dmats a list of dmatrix handles that will be cached
|
||||
|
||||
Reference in New Issue
Block a user