diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 0d6c0df71..fa269eae4 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -66,5 +66,5 @@ Imports: methods, data.table (>= 1.9.6), jsonlite (>= 1.0), -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.2 SystemRequirements: GNU make, C++14 diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index fa9473469..62bcd4050 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -592,12 +592,12 @@ cb.cv.predict <- function(save_models = FALSE) { #' #' #### Multiclass classification: #' # -#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2) +#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1) #' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3, -#' lambda = 0.0003, alpha = 0.0003, nthread = 2) +#' lambda = 0.0003, alpha = 0.0003, nthread = 1) #' # For the default linear updater 'shotgun' it sometimes is helpful #' # to use smaller eta to reduce instability -#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 70, eta = 0.5, +#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5, #' callbacks = list(cb.gblinear.history())) #' # Will plot the coefficient paths separately for each class: #' matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l') diff --git a/R-package/man/cb.gblinear.history.Rd b/R-package/man/cb.gblinear.history.Rd index fbbb56dbe..f050fc7f1 100644 --- a/R-package/man/cb.gblinear.history.Rd +++ b/R-package/man/cb.gblinear.history.Rd @@ -72,12 +72,12 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l') #### Multiclass classification: # -dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2) +dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1) param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3, - lambda = 0.0003, alpha = 0.0003, nthread = 2) + lambda = 0.0003, alpha = 0.0003, nthread = 1) # For the default linear updater 'shotgun' it sometimes is helpful # to use smaller eta to reduce instability -bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 70, eta = 0.5, +bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5, callbacks = list(cb.gblinear.history())) # Will plot the coefficient paths separately for each class: matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l') diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index fcbccb145..777a275d7 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -164,33 +164,68 @@ XGB_DLL SEXP XGDMatrixCreateFromCSC_R(SEXP indptr, SEXP indices, SEXP data, return ret; } -XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, - SEXP num_col, SEXP n_threads) { +XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP num_col, + SEXP n_threads) { SEXP ret; R_API_BEGIN(); const int *p_indptr = INTEGER(indptr); const int *p_indices = INTEGER(indices); const double *p_data = REAL(data); - size_t nindptr = static_cast(length(indptr)); - size_t ndata = static_cast(length(data)); - size_t ncol = static_cast(INTEGER(num_col)[0]); - std::vector row_ptr_(nindptr); - std::vector indices_(ndata); - std::vector data_(ndata); - for (size_t i = 0; i < nindptr; ++i) { - row_ptr_[i] = static_cast(p_indptr[i]); + auto nindptr = static_cast(length(indptr)); + auto ndata = static_cast(length(data)); + auto ncol = static_cast(INTEGER(num_col)[0]); + std::int32_t threads = asInteger(n_threads); + + using xgboost::Array; + using xgboost::Integer; + using xgboost::Json; + using xgboost::Object; + using xgboost::String; + // Construct array interfaces + Json jindptr{Object{}}; + Json jindices{Object{}}; + Json jdata{Object{}}; + jindptr["data"] = + Array{std::vector{Json{reinterpret_cast(p_indptr)}, Json{true}}}; + jindptr["shape"] = std::vector{Json{nindptr}}; + jindptr["version"] = Integer{3}; + + jindices["data"] = + Array{std::vector{Json{reinterpret_cast(p_indices)}, Json{true}}}; + jindices["shape"] = std::vector{Json{ndata}}; + jindices["version"] = Integer{3}; + + jdata["data"] = + Array{std::vector{Json{reinterpret_cast(p_data)}, Json{true}}}; + jdata["shape"] = std::vector{Json{ndata}}; + jdata["version"] = Integer{3}; + + if (DMLC_LITTLE_ENDIAN) { + jindptr["typestr"] = String{"i4"}; + jindices["typestr"] = String{">i4"}; + jdata["typestr"] = String{">i8"}; } - int32_t threads = xgboost::common::OmpGetNumThreads(asInteger(n_threads)); - xgboost::common::ParallelFor(ndata, threads, [&](xgboost::omp_ulong i) { - indices_[i] = static_cast(p_indices[i]); - data_[i] = static_cast(p_data[i]); - }); + std::string indptr, indices, data; + Json::Dump(jindptr, &indptr); + Json::Dump(jindices, &indices); + Json::Dump(jdata, &data); + DMatrixHandle handle; - CHECK_CALL(XGDMatrixCreateFromCSREx(BeginPtr(row_ptr_), BeginPtr(indices_), - BeginPtr(data_), nindptr, ndata, - ncol, &handle)); + Json jconfig{Object{}}; + // Construct configuration + jconfig["nthread"] = Integer{threads}; + jconfig["missing"] = xgboost::Number{std::numeric_limits::quiet_NaN()}; + std::string config; + Json::Dump(jconfig, &config); + CHECK_CALL(XGDMatrixCreateFromCSR(indptr.c_str(), indices.c_str(), data.c_str(), ncol, + config.c_str(), &handle)); ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue)); + R_RegisterCFinalizerEx(ret, _DMatrixFinalizer, TRUE); R_API_END(); UNPROTECT(1); diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index c0fb55b1c..f0a8ab6fc 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -397,17 +397,14 @@ XGB_DLL int XGDMatrixCreateFromCSREx(const size_t* indptr, API_END(); } -XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, - char const *indices, char const *data, - xgboost::bst_ulong ncol, - char const* c_json_config, - DMatrixHandle* out) { +XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char const *data, + xgboost::bst_ulong ncol, char const *c_json_config, + DMatrixHandle *out) { API_BEGIN(); xgboost_CHECK_C_ARG_PTR(indptr); xgboost_CHECK_C_ARG_PTR(indices); xgboost_CHECK_C_ARG_PTR(data); - data::CSRArrayAdapter adapter(StringView{indptr}, StringView{indices}, - StringView{data}, ncol); + data::CSRArrayAdapter adapter(StringView{indptr}, StringView{indices}, StringView{data}, ncol); xgboost_CHECK_C_ARG_PTR(c_json_config); auto config = Json::Load(StringView{c_json_config}); float missing = GetMissing(config); diff --git a/tests/ci_build/test_r_package.py b/tests/ci_build/test_r_package.py index 2f9e59c8f..ea166676d 100644 --- a/tests/ci_build/test_r_package.py +++ b/tests/ci_build/test_r_package.py @@ -165,7 +165,10 @@ def check_rmarkdown() -> None: subprocess.check_call([rscript, "-e", "devtools::document()"], env=env) output = subprocess.run(["git", "diff", "--name-only"], capture_output=True) if len(output.stdout.decode("utf-8").strip()) != 0: - raise ValueError("Please run `devtools::document()`.") + output = subprocess.run(["git", "diff"], capture_output=True) + raise ValueError( + "Please run `devtools::document()`. Diff:\n", output.stdout.decode("utf-8") + ) @cd(r_package)