[R] Don't cap global number of threads for serialization (#10028)
This commit is contained in:
parent
edf501d227
commit
6e3c899ba7
@ -56,7 +56,8 @@ Suggests:
|
|||||||
testthat,
|
testthat,
|
||||||
igraph (>= 1.0.1),
|
igraph (>= 1.0.1),
|
||||||
float,
|
float,
|
||||||
titanic
|
titanic,
|
||||||
|
RhpcBLASctl
|
||||||
Depends:
|
Depends:
|
||||||
R (>= 4.3.0)
|
R (>= 4.3.0)
|
||||||
Imports:
|
Imports:
|
||||||
|
|||||||
@ -6,6 +6,7 @@
|
|||||||
#' @param fname the name of the file to write.
|
#' @param fname the name of the file to write.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
#' fname <- file.path(tempdir(), "xgb.DMatrix.data")
|
#' fname <- file.path(tempdir(), "xgb.DMatrix.data")
|
||||||
|
|||||||
@ -4,7 +4,14 @@
|
|||||||
#' values of one or more global-scope parameters. Use \code{xgb.get.config} to fetch the current
|
#' values of one or more global-scope parameters. Use \code{xgb.get.config} to fetch the current
|
||||||
#' values of all global-scope parameters (listed in
|
#' values of all global-scope parameters (listed in
|
||||||
#' \url{https://xgboost.readthedocs.io/en/stable/parameter.html}).
|
#' \url{https://xgboost.readthedocs.io/en/stable/parameter.html}).
|
||||||
|
#' @details
|
||||||
|
#' Note that serialization-related functions might use a globally-configured number of threads,
|
||||||
|
#' which is managed by the system's OpenMP (OMP) configuration instead. Typically, XGBoost methods
|
||||||
|
#' accept an `nthreads` parameter, but some methods like `readRDS` might get executed before such
|
||||||
|
#' parameter can be supplied.
|
||||||
#'
|
#'
|
||||||
|
#' The number of OMP threads can in turn be configured for example through an environment variable
|
||||||
|
#' `OMP_NUM_THREADS` (needs to be set before R is started), or through `RhpcBLASctl::omp_set_num_threads`.
|
||||||
#' @rdname xgbConfig
|
#' @rdname xgbConfig
|
||||||
#' @title Set and get global configuration
|
#' @title Set and get global configuration
|
||||||
#' @name xgb.set.config, xgb.get.config
|
#' @name xgb.set.config, xgb.get.config
|
||||||
|
|||||||
@ -24,6 +24,7 @@
|
|||||||
#' as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
#' as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
|
|||||||
@ -20,6 +20,7 @@
|
|||||||
#' \code{\link{xgb.save}}
|
#' \code{\link{xgb.save}}
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#'
|
#'
|
||||||
|
|||||||
@ -35,6 +35,7 @@
|
|||||||
#' \code{\link{xgb.load}}
|
#' \code{\link{xgb.load}}
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#'
|
#'
|
||||||
|
|||||||
@ -12,6 +12,7 @@
|
|||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#'
|
#'
|
||||||
|
|||||||
@ -55,6 +55,8 @@ print(paste("test-error=", err))
|
|||||||
# save model to binary local file
|
# save model to binary local file
|
||||||
xgb.save(bst, "xgboost.model")
|
xgb.save(bst, "xgboost.model")
|
||||||
# load binary model to R
|
# load binary model to R
|
||||||
|
# Function doesn't take 'nthreads', but can be set like this:
|
||||||
|
RhpcBLASctl::omp_set_num_threads(1)
|
||||||
bst2 <- xgb.load("xgboost.model")
|
bst2 <- xgb.load("xgboost.model")
|
||||||
pred2 <- predict(bst2, test$data)
|
pred2 <- predict(bst2, test$data)
|
||||||
# pred2 should be identical to pred
|
# pred2 should be identical to pred
|
||||||
|
|||||||
@ -15,6 +15,7 @@ xgb.DMatrix.save(dmatrix, fname)
|
|||||||
Save xgb.DMatrix object to binary file
|
Save xgb.DMatrix object to binary file
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
|
\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||||
fname <- file.path(tempdir(), "xgb.DMatrix.data")
|
fname <- file.path(tempdir(), "xgb.DMatrix.data")
|
||||||
|
|||||||
@ -44,6 +44,7 @@ as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
|||||||
Dump an xgboost model in text format.
|
Dump an xgboost model in text format.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
|
\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
|
|||||||
@ -25,6 +25,7 @@ Note: a model saved as an R-object, has to be loaded using corresponding R-metho
|
|||||||
not \code{xgb.load}.
|
not \code{xgb.load}.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
|
\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
|||||||
@ -41,6 +41,7 @@ how to persist models in a future-proof way, i.e. to make the model accessible i
|
|||||||
releases of XGBoost.
|
releases of XGBoost.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
|
\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
|||||||
@ -21,6 +21,7 @@ xgb.save.raw(model, raw_format = "ubj")
|
|||||||
Save xgboost model from xgboost or xgb.train
|
Save xgboost model from xgboost or xgb.train
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
|
\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
|||||||
@ -25,6 +25,15 @@ values of one or more global-scope parameters. Use \code{xgb.get.config} to fetc
|
|||||||
values of all global-scope parameters (listed in
|
values of all global-scope parameters (listed in
|
||||||
\url{https://xgboost.readthedocs.io/en/stable/parameter.html}).
|
\url{https://xgboost.readthedocs.io/en/stable/parameter.html}).
|
||||||
}
|
}
|
||||||
|
\details{
|
||||||
|
Note that serialization-related functions might use a globally-configured number of threads,
|
||||||
|
which is managed by the system's OpenMP (OMP) configuration instead. Typically, XGBoost methods
|
||||||
|
accept an \code{nthreads} parameter, but some methods like \code{readRDS} might get executed before such
|
||||||
|
parameter can be supplied.
|
||||||
|
|
||||||
|
The number of OMP threads can in turn be configured for example through an environment variable
|
||||||
|
\code{OMP_NUM_THREADS} (needs to be set before R is started), or through \code{RhpcBLASctl::omp_set_num_threads}.
|
||||||
|
}
|
||||||
\examples{
|
\examples{
|
||||||
# Set verbosity level to silent (0)
|
# Set verbosity level to silent (0)
|
||||||
xgb.set.config(verbosity = 0)
|
xgb.set.config(verbosity = 0)
|
||||||
|
|||||||
@ -20,6 +20,7 @@ pkgs <- c(
|
|||||||
"igraph",
|
"igraph",
|
||||||
"float",
|
"float",
|
||||||
"titanic",
|
"titanic",
|
||||||
|
"RhpcBLASctl",
|
||||||
## imports
|
## imports
|
||||||
"Matrix",
|
"Matrix",
|
||||||
"methods",
|
"methods",
|
||||||
|
|||||||
@ -2,3 +2,4 @@ library(testthat)
|
|||||||
library(xgboost)
|
library(xgboost)
|
||||||
|
|
||||||
test_check("xgboost", reporter = ProgressReporter)
|
test_check("xgboost", reporter = ProgressReporter)
|
||||||
|
RhpcBLASctl::omp_set_num_threads(1)
|
||||||
|
|||||||
@ -496,6 +496,9 @@ An interesting test to see how identical our saved model is to the original one
|
|||||||
|
|
||||||
```{r loadModel, message=F, warning=F}
|
```{r loadModel, message=F, warning=F}
|
||||||
# load binary model to R
|
# load binary model to R
|
||||||
|
# Note that the number of threads for 'xgb.load' is taken from global config,
|
||||||
|
# can be modified like this:
|
||||||
|
RhpcBLASctl::omp_set_num_threads(1)
|
||||||
bst2 <- xgb.load(fname)
|
bst2 <- xgb.load(fname)
|
||||||
xgb.parameters(bst2) <- list(nthread = 2)
|
xgb.parameters(bst2) <- list(nthread = 2)
|
||||||
pred2 <- predict(bst2, test$data)
|
pred2 <- predict(bst2, test$data)
|
||||||
|
|||||||
@ -106,30 +106,13 @@ void GBTreeModel::Load(dmlc::Stream* fi) {
|
|||||||
Validate(*this);
|
Validate(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
|
||||||
std::int32_t IOThreads(Context const* ctx) {
|
|
||||||
CHECK(ctx);
|
|
||||||
std::int32_t n_threads = ctx->Threads();
|
|
||||||
// CRAN checks for number of threads used by examples, but we might not have the right
|
|
||||||
// number of threads when serializing/unserializing models as nthread is a booster
|
|
||||||
// parameter, which is only effective after booster initialization.
|
|
||||||
//
|
|
||||||
// The threshold ratio of CPU time to user time for R is 2.5, we set the number of
|
|
||||||
// threads to 2.
|
|
||||||
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
|
|
||||||
n_threads = std::min(2, n_threads);
|
|
||||||
#endif
|
|
||||||
return n_threads;
|
|
||||||
}
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
void GBTreeModel::SaveModel(Json* p_out) const {
|
void GBTreeModel::SaveModel(Json* p_out) const {
|
||||||
auto& out = *p_out;
|
auto& out = *p_out;
|
||||||
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
|
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
|
||||||
out["gbtree_model_param"] = ToJson(param);
|
out["gbtree_model_param"] = ToJson(param);
|
||||||
std::vector<Json> trees_json(trees.size());
|
std::vector<Json> trees_json(trees.size());
|
||||||
|
|
||||||
common::ParallelFor(trees.size(), IOThreads(ctx_), [&](auto t) {
|
common::ParallelFor(trees.size(), ctx_->Threads(), [&](auto t) {
|
||||||
auto const& tree = trees[t];
|
auto const& tree = trees[t];
|
||||||
Json jtree{Object{}};
|
Json jtree{Object{}};
|
||||||
tree->SaveModel(&jtree);
|
tree->SaveModel(&jtree);
|
||||||
@ -167,7 +150,7 @@ void GBTreeModel::LoadModel(Json const& in) {
|
|||||||
CHECK_EQ(tree_info_json.size(), param.num_trees);
|
CHECK_EQ(tree_info_json.size(), param.num_trees);
|
||||||
tree_info.resize(param.num_trees);
|
tree_info.resize(param.num_trees);
|
||||||
|
|
||||||
common::ParallelFor(param.num_trees, IOThreads(ctx_), [&](auto t) {
|
common::ParallelFor(param.num_trees, ctx_->Threads(), [&](auto t) {
|
||||||
auto tree_id = get<Integer const>(trees_json[t]["id"]);
|
auto tree_id = get<Integer const>(trees_json[t]["id"]);
|
||||||
trees.at(tree_id).reset(new RegTree{});
|
trees.at(tree_id).reset(new RegTree{});
|
||||||
trees[tree_id]->LoadModel(trees_json[t]);
|
trees[tree_id]->LoadModel(trees_json[t]);
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user