[R] Set number of threads in demos and tests. (#9591)
- Restrict the number of threads in IO. - Specify the number of threads in demos and tests. - Add helper scripts for checks.
This commit is contained in:
@@ -35,14 +35,18 @@ Callback function expects the following values to be set in its calling frame:
|
||||
}
|
||||
\examples{
|
||||
#### Binary classification:
|
||||
#
|
||||
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||
# without considering the 2nd order interactions:
|
||||
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||
colnames(x)
|
||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
|
||||
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||
# unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
||||
# rate does not break the convergence, but allows us to illustrate the typical pattern of
|
||||
@@ -72,9 +76,9 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||
|
||||
#### Multiclass classification:
|
||||
#
|
||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
|
||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
|
||||
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = 1)
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||
# For the default linear updater 'shotgun' it sometimes is helpful
|
||||
# to use smaller eta to reduce instability
|
||||
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
||||
|
||||
@@ -132,11 +132,16 @@ Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple th
|
||||
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
|
||||
eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
|
||||
# use all trees by default
|
||||
pred <- predict(bst, test$data)
|
||||
# use only the 1st tree
|
||||
|
||||
@@ -38,7 +38,12 @@ Supported input file formats are either a LIBSVM text file or a binary file that
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
dtrain <- with(
|
||||
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||
)
|
||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||
|
||||
@@ -19,10 +19,15 @@ Accessors for model parameters as JSON string.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
train <- agaricus.train
|
||||
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
bst <- xgboost(
|
||||
data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||
)
|
||||
config <- xgb.config(bst)
|
||||
|
||||
}
|
||||
|
||||
@@ -27,14 +27,23 @@ not \code{xgb.load}.
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
bst <- xgboost(
|
||||
data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||
nthread = nthread,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
|
||||
xgb.save(bst, 'xgb.model')
|
||||
bst <- xgb.load('xgb.model')
|
||||
if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||
pred <- predict(bst, test$data)
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
|
||||
|
||||
@@ -66,9 +66,12 @@ Parse a boosted tree model text dump into a \code{data.table} structure.
|
||||
# Basic use:
|
||||
|
||||
data(agaricus.train, package='xgboost')
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||
|
||||
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
||||
|
||||
|
||||
@@ -61,10 +61,13 @@ This function was inspired by the blog post
|
||||
\examples{
|
||||
|
||||
data(agaricus.train, package='xgboost')
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
# Change max_depth to a higher number to get a more significant result
|
||||
## Change max_depth to a higher number to get a more significant result
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
||||
eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
|
||||
eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
|
||||
subsample = 0.5, min_child_weight = 2)
|
||||
|
||||
xgb.plot.deepness(bst)
|
||||
|
||||
@@ -77,9 +77,14 @@ with bar colors corresponding to different clusters that have somewhat similar i
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train)
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
bst <- xgboost(
|
||||
data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||
)
|
||||
|
||||
importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
||||
|
||||
|
||||
@@ -63,10 +63,15 @@ This function is inspired by this blog post:
|
||||
\examples{
|
||||
|
||||
data(agaricus.train, package='xgboost')
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||
eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
||||
min_child_weight = 50, verbose = 0)
|
||||
bst <- xgboost(
|
||||
data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||
eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
|
||||
min_child_weight = 50, verbose = 0
|
||||
)
|
||||
|
||||
p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
||||
print(p)
|
||||
|
||||
@@ -124,9 +124,14 @@ a meaningful thing to do.
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
nrounds <- 20
|
||||
|
||||
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
|
||||
eta = 0.1, max_depth = 3, subsample = .5,
|
||||
method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
|
||||
method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
|
||||
|
||||
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
||||
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
||||
@@ -135,12 +140,11 @@ xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12) # S
|
||||
|
||||
# multiclass example - plots for each class separately:
|
||||
nclass <- 3
|
||||
nrounds <- 20
|
||||
x <- as.matrix(iris[, -5])
|
||||
set.seed(123)
|
||||
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
||||
mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
||||
max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
|
||||
max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
|
||||
objective = "multi:softprob", num_class = nclass, verbose = 0)
|
||||
trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
||||
col <- rgb(0, 0, 1, 0.5)
|
||||
|
||||
@@ -31,14 +31,22 @@ releases of XGBoost.
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
bst <- xgboost(
|
||||
data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||
nthread = nthread,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
xgb.save(bst, 'xgb.model')
|
||||
bst <- xgb.load('xgb.model')
|
||||
if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||
pred <- predict(bst, test$data)
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
|
||||
|
||||
@@ -25,12 +25,17 @@ Save xgboost model from xgboost or xgb.train
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||
|
||||
raw <- xgb.save.raw(bst)
|
||||
bst <- xgb.load.raw(raw)
|
||||
pred <- predict(bst, test$data)
|
||||
|
||||
}
|
||||
|
||||
@@ -250,7 +250,8 @@ customized objective and evaluation metric functions, therefore it is more flexi
|
||||
than the \code{xgboost} interface.
|
||||
|
||||
Parallelization is automatically enabled if \code{OpenMP} is present.
|
||||
Number of threads can also be manually specified via \code{nthread} parameter.
|
||||
Number of threads can also be manually specified via the \code{nthread}
|
||||
parameter.
|
||||
|
||||
The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
||||
when the \code{eval_metric} parameter is not provided.
|
||||
@@ -286,17 +287,25 @@ The following callbacks are automatically created when certain parameters are se
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
dtrain <- with(
|
||||
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||
)
|
||||
dtest <- with(
|
||||
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||
)
|
||||
watchlist <- list(train = dtrain, eval = dtest)
|
||||
|
||||
## A simple xgb.train example:
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||
objective = "binary:logistic", eval_metric = "auc")
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||
|
||||
|
||||
## An xgb.train example where custom objective and evaluation metric are used:
|
||||
## An xgb.train example where custom objective and evaluation metric are
|
||||
## used:
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- getinfo(dtrain, "label")
|
||||
preds <- 1/(1 + exp(-preds))
|
||||
@@ -312,12 +321,12 @@ evalerror <- function(preds, dtrain) {
|
||||
|
||||
# These functions could be used by passing them either:
|
||||
# as 'objective' and 'eval_metric' parameters in the params list:
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||
|
||||
# or through the ... arguments:
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
|
||||
@@ -327,7 +336,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
|
||||
|
||||
## An xgb.train example of using variable learning rates at each iteration:
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||
objective = "binary:logistic", eval_metric = "auc")
|
||||
my_etas <- list(eta = c(0.5, 0.1))
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
@@ -339,7 +348,7 @@ bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
|
||||
|
||||
## An 'xgboost' interface example:
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
||||
max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
||||
max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
|
||||
objective = "binary:logistic")
|
||||
pred <- predict(bst, agaricus.test$data)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user