[R] Set number of threads in demos and tests. (#9591)

- Restrict the number of threads in IO.
- Specify the number of threads in demos and tests.
- Add helper scripts for checks.
This commit is contained in:
Jiaming Yuan
2023-09-23 21:44:03 +08:00
committed by GitHub
parent def77870f3
commit cac2cd2e94
51 changed files with 714 additions and 296 deletions

View File

@@ -35,14 +35,18 @@ Callback function expects the following values to be set in its calling frame:
}
\examples{
#### Binary classification:
#
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
# without considering the 2nd order interactions:
x <- model.matrix(Species ~ .^2, iris)[,-1]
colnames(x)
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
lambda = 0.0003, alpha = 0.0003, nthread = 2)
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
# For 'shotgun', which is a default linear updater, using high eta values may result in
# unstable behaviour in some datasets. With this simple dataset, however, the high learning
# rate does not break the convergence, but allows us to illustrate the typical pattern of
@@ -72,9 +76,9 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
#### Multiclass classification:
#
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
lambda = 0.0003, alpha = 0.0003, nthread = 1)
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
# For the default linear updater 'shotgun' it sometimes is helpful
# to use smaller eta to reduce instability
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,

View File

@@ -132,11 +132,16 @@ Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple th
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
# use all trees by default
pred <- predict(bst, test$data)
# use only the 1st tree

View File

@@ -38,7 +38,12 @@ Supported input file formats are either a LIBSVM text file or a binary file that
}
\examples{
data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
dtrain <- with(
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
)
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')

View File

@@ -19,10 +19,15 @@ Accessors for model parameters as JSON string.
}
\examples{
data(agaricus.train, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
)
config <- xgb.config(bst)
}

View File

@@ -27,14 +27,23 @@ not \code{xgb.load}.
\examples{
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2, eta = 1,
nthread = nthread,
nrounds = 2,
objective = "binary:logistic"
)
xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model')
if (file.exists('xgb.model')) file.remove('xgb.model')
pred <- predict(bst, test$data)
}
\seealso{
\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.

View File

@@ -66,9 +66,12 @@ Parse a boosted tree model text dump into a \code{data.table} structure.
# Basic use:
data(agaricus.train, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))

View File

@@ -61,10 +61,13 @@ This function was inspired by the blog post
\examples{
data(agaricus.train, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
# Change max_depth to a higher number to get a more significant result
## Change max_depth to a higher number to get a more significant result
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
subsample = 0.5, min_child_weight = 2)
xgb.plot.deepness(bst)

View File

@@ -77,9 +77,14 @@ with bar colors corresponding to different clusters that have somewhat similar i
}
\examples{
data(agaricus.train)
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
bst <- xgboost(
data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
)
importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)

View File

@@ -63,10 +63,15 @@ This function is inspired by this blog post:
\examples{
data(agaricus.train, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
min_child_weight = 50, verbose = 0)
bst <- xgboost(
data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
min_child_weight = 50, verbose = 0
)
p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
print(p)

View File

@@ -124,9 +124,14 @@ a meaningful thing to do.
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
nrounds <- 20
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
eta = 0.1, max_depth = 3, subsample = .5,
method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
@@ -135,12 +140,11 @@ xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12) # S
# multiclass example - plots for each class separately:
nclass <- 3
nrounds <- 20
x <- as.matrix(iris[, -5])
set.seed(123)
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
objective = "multi:softprob", num_class = nclass, verbose = 0)
trees0 <- seq(from=0, by=nclass, length.out=nrounds)
col <- rgb(0, 0, 1, 0.5)

View File

@@ -31,14 +31,22 @@ releases of XGBoost.
\examples{
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2, eta = 1,
nthread = nthread,
nrounds = 2,
objective = "binary:logistic"
)
xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model')
if (file.exists('xgb.model')) file.remove('xgb.model')
pred <- predict(bst, test$data)
}
\seealso{
\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.

View File

@@ -25,12 +25,17 @@ Save xgboost model from xgboost or xgb.train
\examples{
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
raw <- xgb.save.raw(bst)
bst <- xgb.load.raw(raw)
pred <- predict(bst, test$data)
}

View File

@@ -250,7 +250,8 @@ customized objective and evaluation metric functions, therefore it is more flexi
than the \code{xgboost} interface.
Parallelization is automatically enabled if \code{OpenMP} is present.
Number of threads can also be manually specified via \code{nthread} parameter.
Number of threads can also be manually specified via the \code{nthread}
parameter.
The evaluation metric is chosen automatically by XGBoost (according to the objective)
when the \code{eval_metric} parameter is not provided.
@@ -286,17 +287,25 @@ The following callbacks are automatically created when certain parameters are se
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
dtrain <- with(
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
)
dtest <- with(
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
)
watchlist <- list(train = dtrain, eval = dtest)
## A simple xgb.train example:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
## An xgb.train example where custom objective and evaluation metric are used:
## An xgb.train example where custom objective and evaluation metric are
## used:
logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
preds <- 1/(1 + exp(-preds))
@@ -312,12 +321,12 @@ evalerror <- function(preds, dtrain) {
# These functions could be used by passing them either:
# as 'objective' and 'eval_metric' parameters in the params list:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
objective = logregobj, eval_metric = evalerror)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
# or through the ... arguments:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
objective = logregobj, eval_metric = evalerror)
@@ -327,7 +336,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
## An xgb.train example of using variable learning rates at each iteration:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
my_etas <- list(eta = c(0.5, 0.1))
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
@@ -339,7 +348,7 @@ bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
## An 'xgboost' interface example:
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
objective = "binary:logistic")
pred <- predict(bst, agaricus.test$data)