[R] Set number of threads in demos and tests. (#9591)

- Restrict the number of threads in IO. - Specify the number of threads in demos and tests. - Add helper scripts for checks.
2023-09-23 21:44:03 +08:00
parent def77870f3
commit cac2cd2e94
51 changed files with 714 additions and 296 deletions
--- a/R-package/man/cb.gblinear.history.Rd
+++ b/R-package/man/cb.gblinear.history.Rd
@@ -35,14 +35,18 @@ Callback function expects the following values to be set in its calling frame:
 }
 \examples{
 #### Binary classification:
-#
+
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+
 # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
 # without considering the 2nd order interactions:
 x <- model.matrix(Species ~ .^2, iris)[,-1]
 colnames(x)
-dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
+dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
 param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
-              lambda = 0.0003, alpha = 0.0003, nthread = 2)
+              lambda = 0.0003, alpha = 0.0003, nthread = nthread)
 # For 'shotgun', which is a default linear updater, using high eta values may result in
 # unstable behaviour in some datasets. With this simple dataset, however, the high learning
 # rate does not break the convergence, but allows us to illustrate the typical pattern of
@@ -72,9 +76,9 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l')

 #### Multiclass classification:
 #
-dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
+dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
 param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
-              lambda = 0.0003, alpha = 0.0003, nthread = 1)
+              lambda = 0.0003, alpha = 0.0003, nthread = nthread)
 # For the default linear updater 'shotgun' it sometimes is helpful
 # to use smaller eta to reduce instability
 bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -132,11 +132,16 @@ Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple th

 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
+
+## Keep the number of threads to 2 for examples
+nthread <- 2
+data.table::setDTthreads(nthread)
+
 train <- agaricus.train
 test <- agaricus.test

 bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
+               eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
 # use all trees by default
 pred <- predict(bst, test$data)
 # use only the 1st tree
--- a/R-package/man/xgb.DMatrix.Rd
+++ b/R-package/man/xgb.DMatrix.Rd
@@ -38,7 +38,12 @@ Supported input file formats are either a LIBSVM text file or a binary file that
 }
 \examples{
 data(agaricus.train, package='xgboost')
-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+dtrain <- with(
+  agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
+)
 xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
 dtrain <- xgb.DMatrix('xgb.DMatrix.data')
 if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
--- a/R-package/man/xgb.config.Rd
+++ b/R-package/man/xgb.config.Rd
@@ -19,10 +19,15 @@ Accessors for model parameters as JSON string.
 }
 \examples{
 data(agaricus.train, package='xgboost')
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
 train <- agaricus.train

-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+bst <- xgboost(
+  data = train$data, label = train$label, max_depth = 2,
+  eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
+)
 config <- xgb.config(bst)

 }
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@@ -27,14 +27,23 @@ not \code{xgb.load}.
 \examples{
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
+
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+bst <- xgboost(
+  data = train$data, label = train$label, max_depth = 2, eta = 1,
+  nthread = nthread,
+  nrounds = 2,
+  objective = "binary:logistic"
+)
+
 xgb.save(bst, 'xgb.model')
 bst <- xgb.load('xgb.model')
 if (file.exists('xgb.model')) file.remove('xgb.model')
-pred <- predict(bst, test$data)
 }
 \seealso{
 \code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@@ -66,9 +66,12 @@ Parse a boosted tree model text dump into a \code{data.table} structure.
 # Basic use:

 data(agaricus.train, package='xgboost')
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)

 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+               eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")

 (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))

--- a/R-package/man/xgb.plot.deepness.Rd
+++ b/R-package/man/xgb.plot.deepness.Rd
@@ -61,10 +61,13 @@ This function was inspired by the blog post
 \examples{

 data(agaricus.train, package='xgboost')
+## Keep the number of threads to 2 for examples
+nthread <- 2
+data.table::setDTthreads(nthread)

-# Change max_depth to a higher number to get a more significant result
+## Change max_depth to a higher number to get a more significant result
 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
-               eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
+               eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
               subsample = 0.5, min_child_weight = 2)

 xgb.plot.deepness(bst)
--- a/R-package/man/xgb.plot.importance.Rd
+++ b/R-package/man/xgb.plot.importance.Rd
@@ -77,9 +77,14 @@ with bar colors corresponding to different clusters that have somewhat similar i
 }
 \examples{
 data(agaricus.train)
+## Keep the number of threads to 2 for examples
+nthread <- 2
+data.table::setDTthreads(nthread)

-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+bst <- xgboost(
+  data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
+  eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
+)

 importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)

--- a/R-package/man/xgb.plot.multi.trees.Rd
+++ b/R-package/man/xgb.plot.multi.trees.Rd
@@ -63,10 +63,15 @@ This function is inspired by this blog post:
 \examples{

 data(agaricus.train, package='xgboost')
+## Keep the number of threads to 2 for examples
+nthread <- 2
+data.table::setDTthreads(nthread)

-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
-               eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
-               min_child_weight = 50, verbose = 0)
+bst <- xgboost(
+  data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
+  eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
+  min_child_weight = 50, verbose = 0
+)

 p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
 print(p)
--- a/R-package/man/xgb.plot.shap.Rd
+++ b/R-package/man/xgb.plot.shap.Rd
@@ -124,9 +124,14 @@ a meaningful thing to do.
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')

-bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+nrounds <- 20
+
+bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
               eta = 0.1, max_depth = 3, subsample = .5,
-               method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
+               method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)

 xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
 contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
@@ -135,12 +140,11 @@ xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12)  # S

 # multiclass example - plots for each class separately:
 nclass <- 3
-nrounds <- 20
 x <- as.matrix(iris[, -5])
 set.seed(123)
 is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
 mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
-                max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
+                max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
                objective = "multi:softprob", num_class = nclass, verbose = 0)
 trees0 <- seq(from=0, by=nclass, length.out=nrounds)
 col <- rgb(0, 0, 1, 0.5)
--- a/R-package/man/xgb.save.Rd
+++ b/R-package/man/xgb.save.Rd
@@ -31,14 +31,22 @@ releases of XGBoost.
 \examples{
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
+
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+bst <- xgboost(
+  data = train$data, label = train$label, max_depth = 2, eta = 1,
+  nthread = nthread,
+  nrounds = 2,
+  objective = "binary:logistic"
+)
 xgb.save(bst, 'xgb.model')
 bst <- xgb.load('xgb.model')
 if (file.exists('xgb.model')) file.remove('xgb.model')
-pred <- predict(bst, test$data)
 }
 \seealso{
 \code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
--- a/R-package/man/xgb.save.raw.Rd
+++ b/R-package/man/xgb.save.raw.Rd
@@ -25,12 +25,17 @@ Save xgboost model from xgboost or xgb.train
 \examples{
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
+
+## Keep the number of threads to 2 for examples
+nthread <- 2
+data.table::setDTthreads(nthread)
+
 train <- agaricus.train
 test <- agaricus.test
 bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
+               eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
+
 raw <- xgb.save.raw(bst)
 bst <- xgb.load.raw(raw)
-pred <- predict(bst, test$data)

 }
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -250,7 +250,8 @@ customized objective and evaluation metric functions, therefore it is more flexi
 than the \code{xgboost} interface.

 Parallelization is automatically enabled if \code{OpenMP} is present.
-Number of threads can also be manually specified via \code{nthread} parameter.
+Number of threads can also be manually specified via the \code{nthread}
+parameter.

 The evaluation metric is chosen automatically by XGBoost (according to the objective)
 when the \code{eval_metric} parameter is not provided.
@@ -286,17 +287,25 @@ The following callbacks are automatically created when certain parameters are se
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')

-dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
-dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
+## Keep the number of threads to 1 for examples
+nthread <- 1
+data.table::setDTthreads(nthread)
+
+dtrain <- with(
+  agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
+)
+dtest <- with(
+  agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
+)
 watchlist <- list(train = dtrain, eval = dtest)

 ## A simple xgb.train example:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
+param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)

-
-## An xgb.train example where custom objective and evaluation metric are used:
+## An xgb.train example where custom objective and evaluation metric are
+## used:
 logregobj <- function(preds, dtrain) {
   labels <- getinfo(dtrain, "label")
   preds <- 1/(1 + exp(-preds))
@@ -312,12 +321,12 @@ evalerror <- function(preds, dtrain) {

 # These functions could be used by passing them either:
 #  as 'objective' and 'eval_metric' parameters in the params list:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
+param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
              objective = logregobj, eval_metric = evalerror)
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)

 #  or through the ... arguments:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
+param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
                 objective = logregobj, eval_metric = evalerror)

@@ -327,7 +336,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,


 ## An xgb.train example of using variable learning rates at each iteration:
-param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
+param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
 my_etas <- list(eta = c(0.5, 0.1))
 bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
@@ -339,7 +348,7 @@ bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,

 ## An 'xgboost' interface example:
 bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
-               max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
+               max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
               objective = "binary:logistic")
 pred <- predict(bst, agaricus.test$data)