diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index 982350e95..8742a2271 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -21,6 +22,67 @@ #include "./xgboost_R.h" // Must follow other includes. +namespace { +[[nodiscard]] std::string MakeArrayInterfaceFromRMat(SEXP R_mat) { + SEXP mat_dims = Rf_getAttrib(R_mat, R_DimSymbol); + const int *ptr_mat_dims = INTEGER(mat_dims); + + // Lambda for type dispatch. + auto make_matrix = [=](auto const *ptr) { + using namespace xgboost; // NOLINT + using T = std::remove_pointer_t; + + auto m = linalg::MatrixView{ + common::Span{ptr, + static_cast(ptr_mat_dims[0]) * static_cast(ptr_mat_dims[1])}, + {ptr_mat_dims[0], ptr_mat_dims[1]}, // Shape + DeviceOrd::CPU(), + linalg::Order::kF // R uses column-major + }; + CHECK(m.FContiguous()); + return linalg::ArrayInterfaceStr(m); + }; + + const SEXPTYPE arr_type = TYPEOF(R_mat); + switch (arr_type) { + case REALSXP: + return make_matrix(REAL(R_mat)); + case INTSXP: + return make_matrix(INTEGER(R_mat)); + case LGLSXP: + return make_matrix(LOGICAL(R_mat)); + default: + LOG(FATAL) << "Array or matrix has unsupported type."; + } + + LOG(FATAL) << "Not reachable"; + return ""; +} + +[[nodiscard]] std::string MakeJsonConfigForArray(SEXP missing, SEXP n_threads, SEXPTYPE arr_type) { + using namespace ::xgboost; // NOLINT + Json jconfig{Object{}}; + + const SEXPTYPE missing_type = TYPEOF(missing); + if (Rf_isNull(missing) || (missing_type == REALSXP && ISNAN(Rf_asReal(missing))) || + (missing_type == LGLSXP && Rf_asLogical(missing) == R_NaInt) || + (missing_type == INTSXP && Rf_asInteger(missing) == R_NaInt)) { + // missing is not specified + if (arr_type == REALSXP) { + jconfig["missing"] = std::numeric_limits::quiet_NaN(); + } else { + jconfig["missing"] = R_NaInt; + } + } else { + // missing specified + jconfig["missing"] = Rf_asReal(missing); + } + + jconfig["nthread"] = Rf_asInteger(n_threads); + return Json::Dump(jconfig); +} +} // namespace + /*! * \brief macro to annotate begin of api */ @@ -94,47 +156,16 @@ XGB_DLL SEXP XGDMatrixCreateFromFile_R(SEXP fname, SEXP silent) { } XGB_DLL SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP missing, SEXP n_threads) { - SEXP ret; + SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); R_API_BEGIN(); - SEXP dim = getAttrib(mat, R_DimSymbol); - size_t nrow = static_cast(INTEGER(dim)[0]); - size_t ncol = static_cast(INTEGER(dim)[1]); - const bool is_int = TYPEOF(mat) == INTSXP; - double *din; - int *iin; - if (is_int) { - iin = INTEGER(mat); - } else { - din = REAL(mat); - } - std::vector data(nrow * ncol); - xgboost::Context ctx; - ctx.nthread = asInteger(n_threads); - std::int32_t threads = ctx.Threads(); - if (is_int) { - xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) { - for (size_t j = 0; j < ncol; ++j) { - auto v = iin[i + nrow * j]; - if (v == NA_INTEGER) { - data[i * ncol + j] = std::numeric_limits::quiet_NaN(); - } else { - data[i * ncol + j] = static_cast(v); - } - } - }); - } else { - xgboost::common::ParallelFor(nrow, threads, [&](xgboost::omp_ulong i) { - for (size_t j = 0; j < ncol; ++j) { - data[i * ncol + j] = din[i + nrow * j]; - } - }); - } + auto array_str = MakeArrayInterfaceFromRMat(mat); + auto config_str = MakeJsonConfigForArray(missing, n_threads, TYPEOF(mat)); DMatrixHandle handle; - CHECK_CALL(XGDMatrixCreateFromMat_omp(BeginPtr(data), nrow, ncol, - asReal(missing), &handle, threads)); - ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + CHECK_CALL(XGDMatrixCreateFromDense(array_str.c_str(), config_str.c_str(), &handle)); + + R_SetExternalPtrAddr(ret, handle); R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_API_END(); UNPROTECT(1); diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R index 461b7d158..a0cf90088 100644 --- a/R-package/tests/testthat/test_dmatrix.R +++ b/R-package/tests/testthat/test_dmatrix.R @@ -265,3 +265,35 @@ test_that("xgb.DMatrix: print", { }) expect_equal(txt, "xgb.DMatrix dim: 6513 x 126 info: NA colnames: no") }) + +test_that("xgb.DMatrix: Inf as missing", { + x_inf <- matrix(as.numeric(1:10), nrow = 5) + x_inf[2, 1] <- Inf + + x_nan <- x_inf + x_nan[2, 1] <- NA_real_ + + m_inf <- xgb.DMatrix(x_inf, nthread = n_threads, missing = Inf) + xgb.DMatrix.save(m_inf, "inf.dmatrix") + + m_nan <- xgb.DMatrix(x_nan, nthread = n_threads, missing = NA_real_) + xgb.DMatrix.save(m_nan, "nan.dmatrix") + + infconn <- file("inf.dmatrix", "rb") + nanconn <- file("nan.dmatrix", "rb") + + expect_equal(file.size("inf.dmatrix"), file.size("nan.dmatrix")) + + bytes <- file.size("inf.dmatrix") + infdmatrix <- readBin(infconn, "raw", n = bytes) + nandmatrix <- readBin(nanconn, "raw", n = bytes) + + expect_equal(length(infdmatrix), length(nandmatrix)) + expect_equal(infdmatrix, nandmatrix) + + close(infconn) + close(nanconn) + + file.remove("inf.dmatrix") + file.remove("nan.dmatrix") +})