merge latest changes

This commit is contained in:
Hui Liu
2023-12-13 21:06:28 -08:00
194 changed files with 4859 additions and 2838 deletions

View File

@@ -5,7 +5,7 @@
#' \code{\link{xgb.DMatrix.save}}).
#'
#' @param data a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object,
#' a \code{dgRMatrix} object (only when making predictions from a fitted model),
#' a \code{dgRMatrix} object,
#' a \code{dsparseVector} object (only when making predictions from a fitted model, will be
#' interpreted as a row vector), or a character string representing a filename.
#' @param info a named list of additional information to store in the \code{xgb.DMatrix} object.

View File

@@ -15,7 +15,7 @@ xgb.DMatrix(
}
\arguments{
\item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object,
a \code{dgRMatrix} object (only when making predictions from a fitted model),
a \code{dgRMatrix} object,
a \code{dsparseVector} object (only when making predictions from a fitted model, will be
interpreted as a row vector), or a character string representing a filename.}

View File

@@ -8,6 +8,7 @@
#include <xgboost/data.h>
#include <xgboost/logging.h>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <sstream>
@@ -21,6 +22,118 @@
#include "./xgboost_R.h" // Must follow other includes.
namespace {
struct ErrorWithUnwind : public std::exception {};
void ThrowExceptionFromRError(void *unused, Rboolean jump) {
if (jump) {
throw ErrorWithUnwind();
}
}
struct PtrToConstChar {
const char *ptr;
};
SEXP WrappedMkChar(void *void_ptr) {
return Rf_mkChar(static_cast<PtrToConstChar*>(void_ptr)->ptr);
}
SEXP SafeMkChar(const char *c_str, SEXP continuation_token) {
PtrToConstChar ptr_struct{c_str};
return R_UnwindProtect(
WrappedMkChar, static_cast<void*>(&ptr_struct),
ThrowExceptionFromRError, nullptr,
continuation_token);
}
[[nodiscard]] std::string MakeArrayInterfaceFromRMat(SEXP R_mat) {
SEXP mat_dims = Rf_getAttrib(R_mat, R_DimSymbol);
const int *ptr_mat_dims = INTEGER(mat_dims);
// Lambda for type dispatch.
auto make_matrix = [=](auto const *ptr) {
using namespace xgboost; // NOLINT
using T = std::remove_pointer_t<decltype(ptr)>;
auto m = linalg::MatrixView<T>{
common::Span{ptr,
static_cast<std::size_t>(ptr_mat_dims[0]) * static_cast<std::size_t>(ptr_mat_dims[1])},
{ptr_mat_dims[0], ptr_mat_dims[1]}, // Shape
DeviceOrd::CPU(),
linalg::Order::kF // R uses column-major
};
CHECK(m.FContiguous());
return linalg::ArrayInterfaceStr(m);
};
const SEXPTYPE arr_type = TYPEOF(R_mat);
switch (arr_type) {
case REALSXP:
return make_matrix(REAL(R_mat));
case INTSXP:
return make_matrix(INTEGER(R_mat));
case LGLSXP:
return make_matrix(LOGICAL(R_mat));
default:
LOG(FATAL) << "Array or matrix has unsupported type.";
}
LOG(FATAL) << "Not reachable";
return "";
}
[[nodiscard]] std::string MakeArrayInterfaceFromRVector(SEXP R_vec) {
const size_t vec_len = Rf_xlength(R_vec);
// Lambda for type dispatch.
auto make_vec = [=](auto const *ptr) {
using namespace xgboost; // NOLINT
auto v = linalg::MakeVec(ptr, vec_len);
return linalg::ArrayInterfaceStr(v);
};
const SEXPTYPE arr_type = TYPEOF(R_vec);
switch (arr_type) {
case REALSXP:
return make_vec(REAL(R_vec));
case INTSXP:
return make_vec(INTEGER(R_vec));
case LGLSXP:
return make_vec(LOGICAL(R_vec));
default:
LOG(FATAL) << "Array or matrix has unsupported type.";
}
LOG(FATAL) << "Not reachable";
return "";
}
[[nodiscard]] std::string MakeJsonConfigForArray(SEXP missing, SEXP n_threads, SEXPTYPE arr_type) {
using namespace ::xgboost; // NOLINT
Json jconfig{Object{}};
const SEXPTYPE missing_type = TYPEOF(missing);
if (Rf_isNull(missing) || (missing_type == REALSXP && ISNAN(Rf_asReal(missing))) ||
(missing_type == LGLSXP && Rf_asLogical(missing) == R_NaInt) ||
(missing_type == INTSXP && Rf_asInteger(missing) == R_NaInt)) {
// missing is not specified
if (arr_type == REALSXP) {
jconfig["missing"] = std::numeric_limits<double>::quiet_NaN();
} else {
jconfig["missing"] = R_NaInt;
}
} else {
// missing specified
jconfig["missing"] = Rf_asReal(missing);
}
jconfig["nthread"] = Rf_asInteger(n_threads);
return Json::Dump(jconfig);
}
} // namespace
/*!
* \brief macro to annotate begin of api
*/
@@ -47,13 +160,6 @@
using dmlc::BeginPtr;
xgboost::Context const *DMatrixCtx(DMatrixHandle handle) {
CHECK_HANDLE();
auto p_m = static_cast<std::shared_ptr<xgboost::DMatrix> *>(handle);
CHECK(p_m);
return p_m->get()->Ctx();
}
XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle) {
return ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
}
@@ -82,11 +188,11 @@ XGB_DLL SEXP XGBGetGlobalConfig_R() {
}
XGB_DLL SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
SEXP ret;
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
R_API_BEGIN();
DMatrixHandle handle;
CHECK_CALL(XGDMatrixCreateFromFile(CHAR(asChar(fname)), asInteger(silent), &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_SetExternalPtrAddr(ret, handle);
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
R_API_END();
UNPROTECT(1);
@@ -94,47 +200,19 @@ XGB_DLL SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) {
}
XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) {
SEXP ret;
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
R_API_BEGIN();
SEXP dim = getAttrib(mat, R_DimSymbol);
size_t nrow = static_cast<size_t>(INTEGER(dim)[0]);
size_t ncol = static_cast<size_t>(INTEGER(dim)[1]);
const bool is_int = TYPEOF(mat) == INTSXP;
double *din;
int *iin;
if (is_int) {
iin = INTEGER(mat);
} else {
din = REAL(mat);
}
std::vector<float> data(nrow * ncol);
xgboost::Context ctx;
ctx.nthread = asInteger(n_threads);
std::int32_t threads = ctx.Threads();
if (is_int) {
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
for (size_t j = 0; j < ncol; ++j) {
auto v = iin[i + nrow * j];
if (v == NA_INTEGER) {
data[i * ncol + j] = std::numeric_limits<float>::quiet_NaN();
} else {
data[i * ncol + j] = static_cast<float>(v);
}
}
});
} else {
xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) {
for (size_t j = 0; j < ncol; ++j) {
data[i * ncol + j] = din[i + nrow * j];
}
});
}
DMatrixHandle handle;
CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol,
asReal(missing), &handle, threads));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
int res_code;
{
auto array_str = MakeArrayInterfaceFromRMat(mat);
auto config_str = MakeJsonConfigForArray(missing, n_threads, TYPEOF(mat));
res_code = XGDMatrixCreateFromDense(array_str.c_str(), config_str.c_str(), &handle);
}
CHECK_CALL(res_code);
R_SetExternalPtrAddr(ret, handle);
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
R_API_END();
UNPROTECT(1);
@@ -148,8 +226,8 @@ void CreateFromSparse(SEXP indptr, SEXP indices, SEXP data, std::string *indptr_
const int *p_indices = INTEGER(indices);
const double *p_data = REAL(data);
auto nindptr = static_cast<std::size_t>(length(indptr));
auto ndata = static_cast<std::size_t>(length(data));
auto nindptr = static_cast<std::size_t>(Rf_xlength(indptr));
auto ndata = static_cast<std::size_t>(Rf_xlength(data));
CHECK_EQ(ndata, p_indptr[nindptr - 1]);
xgboost::detail::MakeSparseFromPtr(p_indptr, p_indices, p_data, nindptr, indptr_str, indices_str,
data_str);
@@ -158,30 +236,32 @@ void CreateFromSparse(SEXP indptr, SEXP indices, SEXP data, std::string *indptr_
XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_row,
SEXP missing, SEXP n_threads) {
SEXP ret;
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
R_API_BEGIN();
std::int32_t threads = asInteger(n_threads);
using xgboost::Integer;
using xgboost::Json;
using xgboost::Object;
std::string sindptr, sindices, sdata;
CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);
auto nrow = static_cast<std::size_t>(INTEGER(num_row)[0]);
DMatrixHandle handle;
Json jconfig{Object{}};
// Construct configuration
jconfig["nthread"] = Integer{threads};
jconfig["missing"] = xgboost::Number{asReal(missing)};
std::string config;
Json::Dump(jconfig, &config);
CHECK_CALL(XGDMatrixCreateFromCSC(sindptr.c_str(), sindices.c_str(), sdata.c_str(), nrow,
config.c_str(), &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
int res_code;
{
using xgboost::Integer;
using xgboost::Json;
using xgboost::Object;
std::string sindptr, sindices, sdata;
CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);
auto nrow = static_cast<std::size_t>(INTEGER(num_row)[0]);
Json jconfig{Object{}};
// Construct configuration
jconfig["nthread"] = Integer{threads};
jconfig["missing"] = xgboost::Number{asReal(missing)};
std::string config;
Json::Dump(jconfig, &config);
res_code = XGDMatrixCreateFromCSC(sindptr.c_str(), sindices.c_str(), sdata.c_str(), nrow,
config.c_str(), &handle);
}
CHECK_CALL(res_code);
R_SetExternalPtrAddr(ret, handle);
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
R_API_END();
UNPROTECT(1);
@@ -190,29 +270,31 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, SEXP
XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_col,
SEXP missing, SEXP n_threads) {
SEXP ret;
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
R_API_BEGIN();
std::int32_t threads = asInteger(n_threads);
using xgboost::Integer;
using xgboost::Json;
using xgboost::Object;
std::string sindptr, sindices, sdata;
CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);
auto ncol = static_cast<std::size_t>(INTEGER(num_col)[0]);
DMatrixHandle handle;
Json jconfig{Object{}};
// Construct configuration
jconfig["nthread"] = Integer{threads};
jconfig["missing"] = xgboost::Number{asReal(missing)};
std::string config;
Json::Dump(jconfig, &config);
CHECK_CALL(XGDMatrixCreateFromCSR(sindptr.c_str(), sindices.c_str(), sdata.c_str(), ncol,
config.c_str(), &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
int res_code;
{
using xgboost::Integer;
using xgboost::Json;
using xgboost::Object;
std::string sindptr, sindices, sdata;
CreateFromSparse(indptr, indices, data, &sindptr, &sindices, &sdata);
auto ncol = static_cast<std::size_t>(INTEGER(num_col)[0]);
Json jconfig{Object{}};
// Construct configuration
jconfig["nthread"] = Integer{threads};
jconfig["missing"] = xgboost::Number{asReal(missing)};
std::string config;
Json::Dump(jconfig, &config);
res_code = XGDMatrixCreateFromCSR(sindptr.c_str(), sindices.c_str(), sdata.c_str(), ncol,
config.c_str(), &handle);
}
R_SetExternalPtrAddr(ret, handle);
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
R_API_END();
UNPROTECT(1);
@@ -220,19 +302,28 @@ XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP
}
XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
SEXP ret;
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
R_API_BEGIN();
int len = length(idxset);
std::vector<int> idxvec(len);
for (int i = 0; i < len; ++i) {
idxvec[i] = INTEGER(idxset)[i] - 1;
}
R_xlen_t len = Rf_xlength(idxset);
const int *idxset_ = INTEGER(idxset);
DMatrixHandle res;
CHECK_CALL(XGDMatrixSliceDMatrixEx(R_ExternalPtrAddr(handle),
BeginPtr(idxvec), len,
&res,
0));
ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
int res_code;
{
std::vector<int> idxvec(len);
#ifndef _MSC_VER
#pragma omp simd
#endif
for (R_xlen_t i = 0; i < len; ++i) {
idxvec[i] = idxset_[i] - 1;
}
res_code = XGDMatrixSliceDMatrixEx(R_ExternalPtrAddr(handle),
BeginPtr(idxvec), len,
&res,
0);
}
CHECK_CALL(res_code);
R_SetExternalPtrAddr(ret, res);
R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE);
R_API_END();
UNPROTECT(1);
@@ -250,23 +341,15 @@ XGB_DLL SEXP XGDMatrixSaveBinary_R(SEXP handle, SEXP fname, SEXP silent) {
XGB_DLL SEXP XGDMatrixSetInfo_R(SEXP handle, SEXP field, SEXP array) {
R_API_BEGIN();
int len = length(array);
const char *name = CHAR(asChar(field));
auto ctx = DMatrixCtx(R_ExternalPtrAddr(handle));
if (!strcmp("group", name)) {
std::vector<unsigned> vec(len);
xgboost::common::ParallelFor(len, ctx->Threads(), [&](xgboost::omp_ulong i) {
vec[i] = static_cast<unsigned>(INTEGER(array)[i]);
});
CHECK_CALL(
XGDMatrixSetUIntInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), BeginPtr(vec), len));
} else {
std::vector<float> vec(len);
xgboost::common::ParallelFor(len, ctx->Threads(),
[&](xgboost::omp_ulong i) { vec[i] = REAL(array)[i]; });
CHECK_CALL(
XGDMatrixSetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), BeginPtr(vec), len));
SEXP field_ = PROTECT(Rf_asChar(field));
int res_code;
{
const std::string array_str = MakeArrayInterfaceFromRVector(array);
res_code = XGDMatrixSetInfoFromInterface(
R_ExternalPtrAddr(handle), CHAR(field_), array_str.c_str());
}
CHECK_CALL(res_code);
UNPROTECT(1);
R_API_END();
return R_NilValue;
}
@@ -275,18 +358,30 @@ XGB_DLL SEXP XGDMatrixSetStrFeatureInfo_R(SEXP handle, SEXP field, SEXP array) {
R_API_BEGIN();
size_t len{0};
if (!isNull(array)) {
len = length(array);
len = Rf_xlength(array);
}
const char *name = CHAR(asChar(field));
std::vector<std::string> str_info;
SEXP str_info_holder = PROTECT(Rf_allocVector(VECSXP, len));
for (size_t i = 0; i < len; ++i) {
str_info.emplace_back(CHAR(asChar(VECTOR_ELT(array, i))));
SET_VECTOR_ELT(str_info_holder, i, Rf_asChar(VECTOR_ELT(array, i)));
}
std::vector<char const*> vec(len);
std::transform(str_info.cbegin(), str_info.cend(), vec.begin(),
[](std::string const &str) { return str.c_str(); });
CHECK_CALL(XGDMatrixSetStrFeatureInfo(R_ExternalPtrAddr(handle), name, vec.data(), len));
SEXP field_ = PROTECT(Rf_asChar(field));
const char *name = CHAR(field_);
int res_code;
{
std::vector<std::string> str_info;
str_info.reserve(len);
for (size_t i = 0; i < len; ++i) {
str_info.emplace_back(CHAR(VECTOR_ELT(str_info_holder, i)));
}
std::vector<char const*> vec(len);
std::transform(str_info.cbegin(), str_info.cend(), vec.begin(),
[](std::string const &str) { return str.c_str(); });
res_code = XGDMatrixSetStrFeatureInfo(R_ExternalPtrAddr(handle), name, vec.data(), len);
}
CHECK_CALL(res_code);
UNPROTECT(2);
R_API_END();
return R_NilValue;
}
@@ -319,8 +414,9 @@ XGB_DLL SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field) {
const float *res;
CHECK_CALL(XGDMatrixGetFloatInfo(R_ExternalPtrAddr(handle), CHAR(asChar(field)), &olen, &res));
ret = PROTECT(allocVector(REALSXP, olen));
double *ret_ = REAL(ret);
for (size_t i = 0; i < olen; ++i) {
REAL(ret)[i] = res[i];
ret_[i] = res[i];
}
R_API_END();
UNPROTECT(1);
@@ -351,16 +447,21 @@ void _BoosterFinalizer(SEXP ext) {
}
XGB_DLL SEXP XGBoosterCreate_R(SEXP dmats) {
SEXP ret;
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
R_API_BEGIN();
int len = length(dmats);
std::vector<void*> dvec;
for (int i = 0; i < len; ++i) {
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
}
R_xlen_t len = Rf_xlength(dmats);
BoosterHandle handle;
CHECK_CALL(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
int res_code;
{
std::vector<void*> dvec(len);
for (R_xlen_t i = 0; i < len; ++i) {
dvec[i] = R_ExternalPtrAddr(VECTOR_ELT(dmats, i));
}
res_code = XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle);
}
CHECK_CALL(res_code);
R_SetExternalPtrAddr(ret, handle);
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
R_API_END();
UNPROTECT(1);
@@ -369,13 +470,18 @@ XGB_DLL SEXP XGBoosterCreate_R(SEXP dmats) {
XGB_DLL SEXP XGBoosterCreateInEmptyObj_R(SEXP dmats, SEXP R_handle) {
R_API_BEGIN();
int len = length(dmats);
std::vector<void*> dvec;
for (int i = 0; i < len; ++i) {
dvec.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
}
R_xlen_t len = Rf_xlength(dmats);
BoosterHandle handle;
CHECK_CALL(XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle));
int res_code;
{
std::vector<void*> dvec(len);
for (R_xlen_t i = 0; i < len; ++i) {
dvec[i] = R_ExternalPtrAddr(VECTOR_ELT(dmats, i));
}
res_code = XGBoosterCreate(BeginPtr(dvec), dvec.size(), &handle);
}
CHECK_CALL(res_code);
R_SetExternalPtrAddr(R_handle, handle);
R_RegisterCFinalizerEx(R_handle, _BoosterFinalizer, TRUE);
R_API_END();
@@ -384,9 +490,12 @@ XGB_DLL SEXP XGBoosterCreateInEmptyObj_R(SEXP dmats, SEXP R_handle) {
XGB_DLL SEXP XGBoosterSetParam_R(SEXP handle, SEXP name, SEXP val) {
R_API_BEGIN();
SEXP name_ = PROTECT(Rf_asChar(name));
SEXP val_ = PROTECT(Rf_asChar(val));
CHECK_CALL(XGBoosterSetParam(R_ExternalPtrAddr(handle),
CHAR(asChar(name)),
CHAR(asChar(val))));
CHAR(name_),
CHAR(val_)));
UNPROTECT(2);
R_API_END();
return R_NilValue;
}
@@ -402,7 +511,7 @@ XGB_DLL SEXP XGBoosterUpdateOneIter_R(SEXP handle, SEXP iter, SEXP dtrain) {
XGB_DLL SEXP XGBoosterTrainOneIter_R(SEXP handle, SEXP dtrain, SEXP iter, SEXP grad, SEXP hess) {
R_API_BEGIN();
CHECK_EQ(length(grad), length(hess)) << "gradient and hess must have same length.";
CHECK_EQ(Rf_xlength(grad), Rf_xlength(hess)) << "gradient and hess must have same length.";
SEXP gdim = getAttrib(grad, R_DimSymbol);
auto n_samples = static_cast<std::size_t>(INTEGER(gdim)[0]);
auto n_targets = static_cast<std::size_t>(INTEGER(gdim)[1]);
@@ -413,11 +522,15 @@ XGB_DLL SEXP XGBoosterTrainOneIter_R(SEXP handle, SEXP dtrain, SEXP iter, SEXP g
double const *d_grad = REAL(grad);
double const *d_hess = REAL(hess);
auto ctx = xgboost::detail::BoosterCtx(R_ExternalPtrAddr(handle));
auto [s_grad, s_hess] = xgboost::detail::MakeGradientInterface(
ctx, d_grad, d_hess, xgboost::linalg::kF, n_samples, n_targets);
CHECK_CALL(XGBoosterTrainOneIter(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dtrain),
asInteger(iter), s_grad.c_str(), s_hess.c_str()));
int res_code;
{
auto ctx = xgboost::detail::BoosterCtx(R_ExternalPtrAddr(handle));
auto [s_grad, s_hess] = xgboost::detail::MakeGradientInterface(
ctx, d_grad, d_hess, xgboost::linalg::kF, n_samples, n_targets);
res_code = XGBoosterTrainOneIter(R_ExternalPtrAddr(handle), R_ExternalPtrAddr(dtrain),
asInteger(iter), s_grad.c_str(), s_hess.c_str());
}
CHECK_CALL(res_code);
R_API_END();
return R_NilValue;
@@ -426,24 +539,35 @@ XGB_DLL SEXP XGBoosterTrainOneIter_R(SEXP handle, SEXP dtrain, SEXP iter, SEXP g
XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
const char *ret;
R_API_BEGIN();
CHECK_EQ(length(dmats), length(evnames))
CHECK_EQ(Rf_xlength(dmats), Rf_xlength(evnames))
<< "dmats and evnams must have same length";
int len = length(dmats);
std::vector<void*> vec_dmats;
std::vector<std::string> vec_names;
std::vector<const char*> vec_sptr;
for (int i = 0; i < len; ++i) {
vec_dmats.push_back(R_ExternalPtrAddr(VECTOR_ELT(dmats, i)));
vec_names.emplace_back(CHAR(asChar(VECTOR_ELT(evnames, i))));
R_xlen_t len = Rf_xlength(dmats);
SEXP evnames_lst = PROTECT(Rf_allocVector(VECSXP, len));
for (R_xlen_t i = 0; i < len; i++) {
SET_VECTOR_ELT(evnames_lst, i, Rf_asChar(VECTOR_ELT(evnames, i)));
}
for (int i = 0; i < len; ++i) {
vec_sptr.push_back(vec_names[i].c_str());
int res_code;
{
std::vector<void*> vec_dmats(len);
std::vector<std::string> vec_names;
vec_names.reserve(len);
std::vector<const char*> vec_sptr(len);
for (R_xlen_t i = 0; i < len; ++i) {
vec_dmats[i] = R_ExternalPtrAddr(VECTOR_ELT(dmats, i));
vec_names.emplace_back(CHAR(VECTOR_ELT(evnames_lst, i)));
}
for (R_xlen_t i = 0; i < len; ++i) {
vec_sptr[i] = vec_names[i].c_str();
}
res_code = XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
BeginPtr(vec_dmats),
BeginPtr(vec_sptr),
len, &ret);
}
CHECK_CALL(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
BeginPtr(vec_dmats),
BeginPtr(vec_sptr),
len, &ret));
CHECK_CALL(res_code);
UNPROTECT(1);
R_API_END();
return mkString(ret);
}
@@ -451,10 +575,11 @@ XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evn
XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_config) {
SEXP r_out_shape;
SEXP r_out_result;
SEXP r_out;
SEXP r_out = PROTECT(allocVector(VECSXP, 2));
SEXP json_config_ = PROTECT(Rf_asChar(json_config));
R_API_BEGIN();
char const *c_json_config = CHAR(asChar(json_config));
char const *c_json_config = CHAR(json_config_);
bst_ulong out_dim;
bst_ulong const *out_shape;
@@ -465,23 +590,19 @@ XGB_DLL SEXP XGBoosterPredictFromDMatrix_R(SEXP handle, SEXP dmat, SEXP json_con
r_out_shape = PROTECT(allocVector(INTSXP, out_dim));
size_t len = 1;
int *r_out_shape_ = INTEGER(r_out_shape);
for (size_t i = 0; i < out_dim; ++i) {
INTEGER(r_out_shape)[i] = out_shape[i];
r_out_shape_[i] = out_shape[i];
len *= out_shape[i];
}
r_out_result = PROTECT(allocVector(REALSXP, len));
auto ctx = xgboost::detail::BoosterCtx(R_ExternalPtrAddr(handle));
xgboost::common::ParallelFor(len, ctx->Threads(), [&](xgboost::omp_ulong i) {
REAL(r_out_result)[i] = out_result[i];
});
r_out = PROTECT(allocVector(VECSXP, 2));
std::copy(out_result, out_result + len, REAL(r_out_result));
SET_VECTOR_ELT(r_out, 0, r_out_shape);
SET_VECTOR_ELT(r_out, 1, r_out_result);
R_API_END();
UNPROTECT(3);
UNPROTECT(4);
return r_out;
}
@@ -504,7 +625,7 @@ XGB_DLL SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) {
R_API_BEGIN();
CHECK_CALL(XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle),
RAW(raw),
length(raw)));
Rf_xlength(raw)));
R_API_END();
return R_NilValue;
}
@@ -562,45 +683,54 @@ XGB_DLL SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw) {
R_API_BEGIN();
CHECK_CALL(XGBoosterUnserializeFromBuffer(R_ExternalPtrAddr(handle),
RAW(raw),
length(raw)));
Rf_xlength(raw)));
R_API_END();
return R_NilValue;
}
XGB_DLL SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats, SEXP dump_format) {
SEXP out;
SEXP continuation_token = PROTECT(R_MakeUnwindCont());
SEXP dump_format_ = PROTECT(Rf_asChar(dump_format));
SEXP fmap_ = PROTECT(Rf_asChar(fmap));
R_API_BEGIN();
bst_ulong olen;
const char **res;
const char *fmt = CHAR(asChar(dump_format));
const char *fmt = CHAR(dump_format_);
CHECK_CALL(XGBoosterDumpModelEx(R_ExternalPtrAddr(handle),
CHAR(asChar(fmap)),
CHAR(fmap_),
asInteger(with_stats),
fmt,
&olen, &res));
out = PROTECT(allocVector(STRSXP, olen));
if (!strcmp("json", fmt)) {
std::stringstream stream;
stream << "[\n";
for (size_t i = 0; i < olen; ++i) {
stream << res[i];
if (i < olen - 1) {
stream << ",\n";
} else {
stream << "\n";
try {
if (!strcmp("json", fmt)) {
std::stringstream stream;
stream << "[\n";
for (size_t i = 0; i < olen; ++i) {
stream << res[i];
if (i < olen - 1) {
stream << ",\n";
} else {
stream << "\n";
}
}
stream << "]";
const std::string temp_str = stream.str();
SET_STRING_ELT(out, 0, SafeMkChar(temp_str.c_str(), continuation_token));
} else {
for (size_t i = 0; i < olen; ++i) {
std::stringstream stream;
stream << "booster[" << i <<"]\n" << res[i];
const std::string temp_str = stream.str();
SET_STRING_ELT(out, i, SafeMkChar(temp_str.c_str(), continuation_token));
}
}
stream << "]";
SET_STRING_ELT(out, 0, mkChar(stream.str().c_str()));
} else {
for (size_t i = 0; i < olen; ++i) {
std::stringstream stream;
stream << "booster[" << i <<"]\n" << res[i];
SET_STRING_ELT(out, i, mkChar(stream.str().c_str()));
}
} catch (ErrorWithUnwind &e) {
R_ContinueUnwind(continuation_token);
}
R_API_END();
UNPROTECT(1);
UNPROTECT(4);
return out;
}
@@ -626,9 +756,19 @@ XGB_DLL SEXP XGBoosterGetAttr_R(SEXP handle, SEXP name) {
XGB_DLL SEXP XGBoosterSetAttr_R(SEXP handle, SEXP name, SEXP val) {
R_API_BEGIN();
const char *v = isNull(val) ? nullptr : CHAR(asChar(val));
const char *v = nullptr;
SEXP name_ = PROTECT(Rf_asChar(name));
SEXP val_;
int n_protected = 1;
if (!Rf_isNull(val)) {
val_ = PROTECT(Rf_asChar(val));
n_protected++;
v = CHAR(val_);
}
CHECK_CALL(XGBoosterSetAttr(R_ExternalPtrAddr(handle),
CHAR(asChar(name)), v));
CHAR(name_), v));
UNPROTECT(n_protected);
R_API_END();
return R_NilValue;
}
@@ -657,7 +797,7 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {
SEXP out_features_sexp;
SEXP out_scores_sexp;
SEXP out_shape_sexp;
SEXP r_out;
SEXP r_out = PROTECT(allocVector(VECSXP, 3));
R_API_BEGIN();
char const *c_json_config = CHAR(asChar(json_config));
@@ -673,23 +813,20 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {
&out_dim, &out_shape, &out_scores));
out_shape_sexp = PROTECT(allocVector(INTSXP, out_dim));
size_t len = 1;
int *out_shape_sexp_ = INTEGER(out_shape_sexp);
for (size_t i = 0; i < out_dim; ++i) {
INTEGER(out_shape_sexp)[i] = out_shape[i];
out_shape_sexp_[i] = out_shape[i];
len *= out_shape[i];
}
out_scores_sexp = PROTECT(allocVector(REALSXP, len));
auto ctx = xgboost::detail::BoosterCtx(R_ExternalPtrAddr(handle));
xgboost::common::ParallelFor(len, ctx->Threads(), [&](xgboost::omp_ulong i) {
REAL(out_scores_sexp)[i] = out_scores[i];
});
out_features_sexp = PROTECT(allocVector(STRSXP, out_n_features));
for (size_t i = 0; i < out_n_features; ++i) {
SET_STRING_ELT(out_features_sexp, i, mkChar(out_features[i]));
}
r_out = PROTECT(allocVector(VECSXP, 3));
out_scores_sexp = PROTECT(allocVector(REALSXP, len));
std::copy(out_scores, out_scores + len, REAL(out_scores_sexp));
SET_VECTOR_ELT(r_out, 0, out_features_sexp);
SET_VECTOR_ELT(r_out, 1, out_shape_sexp);
SET_VECTOR_ELT(r_out, 2, out_scores_sexp);

View File

@@ -265,3 +265,35 @@ test_that("xgb.DMatrix: print", {
})
expect_equal(txt, "xgb.DMatrix dim: 6513 x 126 info: NA colnames: no")
})
test_that("xgb.DMatrix: Inf as missing", {
x_inf <- matrix(as.numeric(1:10), nrow = 5)
x_inf[2, 1] <- Inf
x_nan <- x_inf
x_nan[2, 1] <- NA_real_
m_inf <- xgb.DMatrix(x_inf, nthread = n_threads, missing = Inf)
xgb.DMatrix.save(m_inf, "inf.dmatrix")
m_nan <- xgb.DMatrix(x_nan, nthread = n_threads, missing = NA_real_)
xgb.DMatrix.save(m_nan, "nan.dmatrix")
infconn <- file("inf.dmatrix", "rb")
nanconn <- file("nan.dmatrix", "rb")
expect_equal(file.size("inf.dmatrix"), file.size("nan.dmatrix"))
bytes <- file.size("inf.dmatrix")
infdmatrix <- readBin(infconn, "raw", n = bytes)
nandmatrix <- readBin(nanconn, "raw", n = bytes)
expect_equal(length(infdmatrix), length(nandmatrix))
expect_equal(infdmatrix, nandmatrix)
close(infconn)
close(nanconn)
file.remove("inf.dmatrix")
file.remove("nan.dmatrix")
})