[R] Set number of threads in demos and tests. (#9591)

- Restrict the number of threads in IO.
- Specify the number of threads in demos and tests.
- Add helper scripts for checks.
This commit is contained in:
Jiaming Yuan 2023-09-23 21:44:03 +08:00 committed by GitHub
parent def77870f3
commit cac2cd2e94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
51 changed files with 714 additions and 296 deletions

View File

@ -4,3 +4,5 @@
^.*\.Rproj$
^\.Rproj\.user$
README.md
^doc$
^Meta$

View File

@ -557,14 +557,18 @@ cb.cv.predict <- function(save_models = FALSE) {
#'
#' @examples
#' #### Binary classification:
#' #
#'
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
#' # without considering the 2nd order interactions:
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
#' colnames(x)
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
@ -594,9 +598,9 @@ cb.cv.predict <- function(save_models = FALSE) {
#'
#' #### Multiclass classification:
#' #
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
#' lambda = 0.0003, alpha = 0.0003, nthread = 1)
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
#' # For the default linear updater 'shotgun' it sometimes is helpful
#' # to use smaller eta to reduce instability
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,

View File

@ -267,11 +267,16 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
#'
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#'
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
#' eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
#' # use all trees by default
#' pred <- predict(bst, test$data)
#' # use only the 1st tree
@ -337,8 +342,14 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
object <- xgb.Booster.complete(object, saveraw = FALSE)
if (!inherits(newdata, "xgb.DMatrix"))
newdata <- xgb.DMatrix(newdata, missing = missing, nthread = NVL(object$params[["nthread"]], -1))
if (!inherits(newdata, "xgb.DMatrix")) {
config <- jsonlite::fromJSON(xgb.config(object))
nthread <- strtoi(config$learner$generic_param$nthread)
newdata <- xgb.DMatrix(
newdata,
missing = missing, nthread = NVL(nthread, -1)
)
}
if (!is.null(object[["feature_names"]]) &&
!is.null(colnames(newdata)) &&
!identical(object[["feature_names"]], colnames(newdata)))
@ -628,10 +639,15 @@ xgb.attributes <- function(object) {
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' bst <- xgboost(
#' data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
#' )
#' config <- xgb.config(bst)
#'
#' @rdname xgb.config

View File

@ -18,7 +18,12 @@
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#' dtrain <- with(
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')

View File

@ -22,14 +22,23 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' bst <- xgboost(
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
#' nthread = nthread,
#' nrounds = 2,
#' objective = "binary:logistic"
#' )
#'
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' if (file.exists('xgb.model')) file.remove('xgb.model')
#' pred <- predict(bst, test$data)
#' @export
xgb.load <- function(modelfile) {
if (is.null(modelfile))

View File

@ -46,9 +46,12 @@
#' # Basic use:
#'
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
#'
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
#'

View File

@ -45,10 +45,13 @@
#' @examples
#'
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' # Change max_depth to a higher number to get a more significant result
#' ## Change max_depth to a higher number to get a more significant result
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
#' eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
#' eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
#' subsample = 0.5, min_child_weight = 2)
#'
#' xgb.plot.deepness(bst)

View File

@ -45,9 +45,14 @@
#'
#' @examples
#' data(agaricus.train)
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' bst <- xgboost(
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
#' )
#'
#' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
#'

View File

@ -43,10 +43,15 @@
#' @examples
#'
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
#' eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
#' min_child_weight = 50, verbose = 0)
#' bst <- xgboost(
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
#' eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
#' min_child_weight = 50, verbose = 0
#' )
#'
#' p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
#' print(p)

View File

@ -74,9 +74,14 @@
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#' nrounds <- 20
#'
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
#' eta = 0.1, max_depth = 3, subsample = .5,
#' method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
#' method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
#'
#' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
#' contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
@ -85,12 +90,11 @@
#'
#' # multiclass example - plots for each class separately:
#' nclass <- 3
#' nrounds <- 20
#' x <- as.matrix(iris[, -5])
#' set.seed(123)
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
#' mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
#' objective = "multi:softprob", num_class = nclass, verbose = 0)
#' trees0 <- seq(from=0, by=nclass, length.out=nrounds)
#' col <- rgb(0, 0, 1, 0.5)

View File

@ -25,14 +25,22 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' bst <- xgboost(
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
#' nthread = nthread,
#' nrounds = 2,
#' objective = "binary:logistic"
#' )
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' if (file.exists('xgb.model')) file.remove('xgb.model')
#' pred <- predict(bst, test$data)
#' @export
xgb.save <- function(model, fname) {
if (typeof(fname) != "character")

View File

@ -16,13 +16,18 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
#'
#' raw <- xgb.save.raw(bst)
#' bst <- xgb.load.raw(raw)
#' pred <- predict(bst, test$data)
#'
#' @export
xgb.save.raw <- function(model, raw_format = "deprecated") {

View File

@ -168,7 +168,8 @@
#' than the \code{xgboost} interface.
#'
#' Parallelization is automatically enabled if \code{OpenMP} is present.
#' Number of threads can also be manually specified via \code{nthread} parameter.
#' Number of threads can also be manually specified via the \code{nthread}
#' parameter.
#'
#' The evaluation metric is chosen automatically by XGBoost (according to the objective)
#' when the \code{eval_metric} parameter is not provided.
@ -237,17 +238,25 @@
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' dtrain <- with(
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
#' dtest <- with(
#' agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
#' watchlist <- list(train = dtrain, eval = dtest)
#'
#' ## A simple xgb.train example:
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
#' objective = "binary:logistic", eval_metric = "auc")
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
#'
#'
#' ## An xgb.train example where custom objective and evaluation metric are used:
#' ## An xgb.train example where custom objective and evaluation metric are
#' ## used:
#' logregobj <- function(preds, dtrain) {
#' labels <- getinfo(dtrain, "label")
#' preds <- 1/(1 + exp(-preds))
@ -263,12 +272,12 @@
#'
#' # These functions could be used by passing them either:
#' # as 'objective' and 'eval_metric' parameters in the params list:
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
#' objective = logregobj, eval_metric = evalerror)
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
#'
#' # or through the ... arguments:
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
#' objective = logregobj, eval_metric = evalerror)
#'
@ -278,7 +287,7 @@
#'
#'
#' ## An xgb.train example of using variable learning rates at each iteration:
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
#' objective = "binary:logistic", eval_metric = "auc")
#' my_etas <- list(eta = c(0.5, 0.1))
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
@ -290,7 +299,7 @@
#'
#' ## An 'xgboost' interface example:
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
#' max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
#' max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
#' objective = "binary:logistic")
#' pred <- predict(bst, agaricus.test$data)
#'

View File

@ -35,14 +35,18 @@ Callback function expects the following values to be set in its calling frame:
}
\examples{
#### Binary classification:
#
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
# without considering the 2nd order interactions:
x <- model.matrix(Species ~ .^2, iris)[,-1]
colnames(x)
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
lambda = 0.0003, alpha = 0.0003, nthread = 2)
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
# For 'shotgun', which is a default linear updater, using high eta values may result in
# unstable behaviour in some datasets. With this simple dataset, however, the high learning
# rate does not break the convergence, but allows us to illustrate the typical pattern of
@ -72,9 +76,9 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
#### Multiclass classification:
#
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
lambda = 0.0003, alpha = 0.0003, nthread = 1)
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
# For the default linear updater 'shotgun' it sometimes is helpful
# to use smaller eta to reduce instability
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,

View File

@ -132,11 +132,16 @@ Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple th
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
# use all trees by default
pred <- predict(bst, test$data)
# use only the 1st tree

View File

@ -38,7 +38,12 @@ Supported input file formats are either a LIBSVM text file or a binary file that
}
\examples{
data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
dtrain <- with(
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
)
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')

View File

@ -19,10 +19,15 @@ Accessors for model parameters as JSON string.
}
\examples{
data(agaricus.train, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
)
config <- xgb.config(bst)
}

View File

@ -27,14 +27,23 @@ not \code{xgb.load}.
\examples{
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2, eta = 1,
nthread = nthread,
nrounds = 2,
objective = "binary:logistic"
)
xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model')
if (file.exists('xgb.model')) file.remove('xgb.model')
pred <- predict(bst, test$data)
}
\seealso{
\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.

View File

@ -66,9 +66,12 @@ Parse a boosted tree model text dump into a \code{data.table} structure.
# Basic use:
data(agaricus.train, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))

View File

@ -61,10 +61,13 @@ This function was inspired by the blog post
\examples{
data(agaricus.train, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
# Change max_depth to a higher number to get a more significant result
## Change max_depth to a higher number to get a more significant result
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
subsample = 0.5, min_child_weight = 2)
xgb.plot.deepness(bst)

View File

@ -77,9 +77,14 @@ with bar colors corresponding to different clusters that have somewhat similar i
}
\examples{
data(agaricus.train)
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
bst <- xgboost(
data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
)
importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)

View File

@ -63,10 +63,15 @@ This function is inspired by this blog post:
\examples{
data(agaricus.train, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
min_child_weight = 50, verbose = 0)
bst <- xgboost(
data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
min_child_weight = 50, verbose = 0
)
p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
print(p)

View File

@ -124,9 +124,14 @@ a meaningful thing to do.
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
nrounds <- 20
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
eta = 0.1, max_depth = 3, subsample = .5,
method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
@ -135,12 +140,11 @@ xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12) # S
# multiclass example - plots for each class separately:
nclass <- 3
nrounds <- 20
x <- as.matrix(iris[, -5])
set.seed(123)
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
objective = "multi:softprob", num_class = nclass, verbose = 0)
trees0 <- seq(from=0, by=nclass, length.out=nrounds)
col <- rgb(0, 0, 1, 0.5)

View File

@ -31,14 +31,22 @@ releases of XGBoost.
\examples{
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2, eta = 1,
nthread = nthread,
nrounds = 2,
objective = "binary:logistic"
)
xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model')
if (file.exists('xgb.model')) file.remove('xgb.model')
pred <- predict(bst, test$data)
}
\seealso{
\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.

View File

@ -25,12 +25,17 @@ Save xgboost model from xgboost or xgb.train
\examples{
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
raw <- xgb.save.raw(bst)
bst <- xgb.load.raw(raw)
pred <- predict(bst, test$data)
}

View File

@ -250,7 +250,8 @@ customized objective and evaluation metric functions, therefore it is more flexi
than the \code{xgboost} interface.
Parallelization is automatically enabled if \code{OpenMP} is present.
Number of threads can also be manually specified via \code{nthread} parameter.
Number of threads can also be manually specified via the \code{nthread}
parameter.
The evaluation metric is chosen automatically by XGBoost (according to the objective)
when the \code{eval_metric} parameter is not provided.
@ -286,17 +287,25 @@ The following callbacks are automatically created when certain parameters are se
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
dtrain <- with(
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
)
dtest <- with(
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
)
watchlist <- list(train = dtrain, eval = dtest)
## A simple xgb.train example:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
## An xgb.train example where custom objective and evaluation metric are used:
## An xgb.train example where custom objective and evaluation metric are
## used:
logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
preds <- 1/(1 + exp(-preds))
@ -312,12 +321,12 @@ evalerror <- function(preds, dtrain) {
# These functions could be used by passing them either:
# as 'objective' and 'eval_metric' parameters in the params list:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
objective = logregobj, eval_metric = evalerror)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
# or through the ... arguments:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
objective = logregobj, eval_metric = evalerror)
@ -327,7 +336,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
## An xgb.train example of using variable learning rates at each iteration:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
my_etas <- list(eta = c(0.5, 0.1))
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
@ -339,7 +348,7 @@ bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
## An 'xgboost' interface example:
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
objective = "binary:logistic")
pred <- predict(bst, agaricus.test$data)

View File

@ -0,0 +1,25 @@
## Helper script for running individual examples.
library(pkgload)
library(xgboost)
files <- list.files("./man")
run_example_timeit <- function(f) {
path <- paste("./man/", f, sep = "")
print(paste("Test", f))
flush.console()
t0 <- proc.time()
run_example(path)
t1 <- proc.time()
list(file = f, time = t1 - t0)
}
timings <- lapply(files, run_example_timeit)
for (t in timings) {
ratio <- t$time[1] / t$time[3]
if (!is.na(ratio) && !is.infinite(ratio) && ratio >= 2.5) {
print(paste("Offending example:", t$file, ratio))
}
}

View File

@ -1,7 +1,7 @@
context("basic functions")
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
data(agaricus.train, package = "xgboost")
data(agaricus.test, package = "xgboost")
train <- agaricus.train
test <- agaricus.test
set.seed(1994)
@ -9,15 +9,20 @@ set.seed(1994)
# disable some tests for Win32
windows_flag <- .Platform$OS.type == "windows" &&
.Machine$sizeof.pointer != 8
solaris_flag <- (Sys.info()['sysname'] == "SunOS")
solaris_flag <- (Sys.info()["sysname"] == "SunOS")
n_threads <- 1
test_that("train and predict binary classification", {
nrounds <- 2
expect_output(
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
eval_metric = "error")
, "train-error")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = nrounds,
objective = "binary:logistic", eval_metric = "error"
),
"train-error"
)
expect_equal(class(bst), "xgb.Booster")
expect_equal(bst$niter, nrounds)
expect_false(is.null(bst$evaluation_log))
@ -46,26 +51,39 @@ test_that("parameter validation works", {
d <- cbind(
x1 = rnorm(10),
x2 = rnorm(10),
x3 = rnorm(10))
x3 = rnorm(10)
)
y <- d[, "x1"] + d[, "x2"]^2 +
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
rnorm(10)
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
correct <- function() {
params <- list(max_depth = 2, booster = "dart",
rate_drop = 0.5, one_drop = TRUE,
objective = "reg:squarederror")
params <- list(
max_depth = 2,
booster = "dart",
rate_drop = 0.5,
one_drop = TRUE,
nthread = n_threads,
objective = "reg:squarederror"
)
xgb.train(params = params, data = dtrain, nrounds = nrounds)
}
expect_silent(correct())
incorrect <- function() {
params <- list(max_depth = 2, booster = "dart",
rate_drop = 0.5, one_drop = TRUE,
params <- list(
max_depth = 2,
booster = "dart",
rate_drop = 0.5,
one_drop = TRUE,
objective = "reg:squarederror",
foo = "bar", bar = "foo")
nthread = n_threads,
foo = "bar",
bar = "foo"
)
output <- capture.output(
xgb.train(params = params, data = dtrain, nrounds = nrounds))
xgb.train(params = params, data = dtrain, nrounds = nrounds)
)
print(output)
}
expect_output(incorrect(), '\\\\"bar\\\\", \\\\"foo\\\\"')
@ -79,7 +97,8 @@ test_that("dart prediction works", {
d <- cbind(
x1 = rnorm(100),
x2 = rnorm(100),
x3 = rnorm(100))
x3 = rnorm(100)
)
y <- d[, "x1"] + d[, "x2"]^2 +
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
rnorm(100)
@ -93,7 +112,7 @@ test_that("dart prediction works", {
rate_drop = 0.5,
one_drop = TRUE,
eta = 1,
nthread = 2,
nthread = n_threads,
nrounds = nrounds,
objective = "reg:squarederror"
)
@ -105,7 +124,7 @@ test_that("dart prediction works", {
expect_false(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
set.seed(1994)
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
booster_by_train <- xgb.train(
params = list(
booster = "dart",
@ -113,7 +132,7 @@ test_that("dart prediction works", {
eta = 1,
rate_drop = 0.5,
one_drop = TRUE,
nthread = 1,
nthread = n_threads,
objective = "reg:squarederror"
),
data = dtrain,
@ -132,10 +151,13 @@ test_that("train and predict softprob", {
lb <- as.numeric(iris$Species) - 1
set.seed(11)
expect_output(
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
objective = "multi:softprob", num_class = 3, eval_metric = "merror")
, "train-merror")
bst <- xgboost(
data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
),
"train-merror"
)
expect_false(is.null(bst$evaluation_log))
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
expect_equal(bst$niter * 3, xgb.ntree(bst))
@ -164,9 +186,10 @@ test_that("train and predict softprob", {
x3 = rnorm(100)
)
y <- sample.int(10, 100, replace = TRUE) - 1
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
booster <- xgb.train(
params = list(tree_method = "hist"), data = dtrain, nrounds = 4, num_class = 10,
params = list(tree_method = "hist", nthread = n_threads),
data = dtrain, nrounds = 4, num_class = 10,
objective = "multi:softprob"
)
predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE)
@ -178,10 +201,13 @@ test_that("train and predict softmax", {
lb <- as.numeric(iris$Species) - 1
set.seed(11)
expect_output(
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
objective = "multi:softmax", num_class = 3, eval_metric = "merror")
, "train-merror")
bst <- xgboost(
data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
objective = "multi:softmax", num_class = 3, eval_metric = "merror"
),
"train-merror"
)
expect_false(is.null(bst$evaluation_log))
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
expect_equal(bst$niter * 3, xgb.ntree(bst))
@ -196,9 +222,12 @@ test_that("train and predict RF", {
set.seed(11)
lb <- train$label
# single iteration
bst <- xgboost(data = train$data, label = lb, max_depth = 5,
nthread = 2, nrounds = 1, objective = "binary:logistic", eval_metric = "error",
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
bst <- xgboost(
data = train$data, label = lb, max_depth = 5,
nthread = n_threads,
nrounds = 1, objective = "binary:logistic", eval_metric = "error",
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1
)
expect_equal(bst$niter, 1)
expect_equal(xgb.ntree(bst), 20)
@ -219,11 +248,13 @@ test_that("train and predict RF with softprob", {
lb <- as.numeric(iris$Species) - 1
nrounds <- 15
set.seed(11)
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds,
bst <- xgboost(
data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.9, nthread = n_threads, nrounds = nrounds,
objective = "multi:softprob", eval_metric = "merror",
num_class = 3, verbose = 0,
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5)
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5
)
expect_equal(bst$niter, 15)
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
# predict for all iterations:
@ -240,18 +271,24 @@ test_that("train and predict RF with softprob", {
test_that("use of multiple eval metrics works", {
expect_output(
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
, "train-error.*train-auc.*train-logloss")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
),
"train-error.*train-auc.*train-logloss"
)
expect_false(is.null(bst$evaluation_log))
expect_equal(dim(bst$evaluation_log), c(2, 4))
expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
expect_output(
bst2 <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
eval_metric = list("error", "auc", "logloss"))
, "train-error.*train-auc.*train-logloss")
bst2 <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = list("error", "auc", "logloss")
),
"train-error.*train-auc.*train-logloss"
)
expect_false(is.null(bst2$evaluation_log))
expect_equal(dim(bst2$evaluation_log), c(2, 4))
expect_equal(colnames(bst2$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
@ -259,9 +296,11 @@ test_that("use of multiple eval metrics works", {
test_that("training continuation works", {
dtrain <- xgb.DMatrix(train$data, label = train$label)
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
param <- list(
objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
)
# for the reference, use 4 iterations at once:
set.seed(11)
@ -271,30 +310,33 @@ test_that("training continuation works", {
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
# continue for two more:
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
if (!windows_flag && !solaris_flag)
if (!windows_flag && !solaris_flag) {
expect_equal(bst$raw, bst2$raw)
}
expect_false(is.null(bst2$evaluation_log))
expect_equal(dim(bst2$evaluation_log), c(4, 2))
expect_equal(bst2$evaluation_log, bst$evaluation_log)
# test continuing from raw model data
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
if (!windows_flag && !solaris_flag)
if (!windows_flag && !solaris_flag) {
expect_equal(bst$raw, bst2$raw)
}
expect_equal(dim(bst2$evaluation_log), c(2, 2))
# test continuing from a model in file
xgb.save(bst1, "xgboost.json")
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.json")
if (!windows_flag && !solaris_flag)
if (!windows_flag && !solaris_flag) {
expect_equal(bst$raw, bst2$raw)
}
expect_equal(dim(bst2$evaluation_log), c(2, 2))
file.remove("xgboost.json")
})
test_that("model serialization works", {
out_path <- "model_serialization"
dtrain <- xgb.DMatrix(train$data, label = train$label)
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic")
param <- list(objective = "binary:logistic", nthread = n_threads)
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
raw <- xgb.serialize(booster)
saveRDS(raw, out_path)
@ -309,11 +351,14 @@ test_that("model serialization works", {
test_that("xgb.cv works", {
set.seed(11)
expect_output(
cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", verbose = TRUE)
, "train-error:")
expect_is(cv, 'xgb.cv.synchronous')
cv <- xgb.cv(
data = train$data, label = train$label, max_depth = 2, nfold = 5,
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", verbose = TRUE
),
"train-error:"
)
expect_is(cv, "xgb.cv.synchronous")
expect_false(is.null(cv$evaluation_log))
expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008)
@ -326,15 +371,19 @@ test_that("xgb.cv works", {
})
test_that("xgb.cv works with stratified folds", {
dtrain <- xgb.DMatrix(train$data, label = train$label)
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
set.seed(314159)
cv <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
verbose = TRUE, stratified = FALSE)
cv <- xgb.cv(
data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = TRUE, stratified = FALSE
)
set.seed(314159)
cv2 <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
verbose = TRUE, stratified = TRUE)
cv2 <- xgb.cv(
data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = TRUE, stratified = TRUE
)
# Stratified folds should result in a different evaluation logs
expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
})
@ -342,40 +391,57 @@ test_that("xgb.cv works with stratified folds", {
test_that("train and predict with non-strict classes", {
# standard dense matrix input
train_dense <- as.matrix(train$data)
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
bst <- xgboost(
data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = 0
)
pr0 <- predict(bst, train_dense)
# dense matrix-like input of non-matrix class
class(train_dense) <- 'shmatrix'
class(train_dense) <- "shmatrix"
expect_true(is.matrix(train_dense))
expect_error(
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
, regexp = NA)
bst <- xgboost(
data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = 0
),
regexp = NA
)
expect_error(pr <- predict(bst, train_dense), regexp = NA)
expect_equal(pr0, pr)
# dense matrix-like input of non-matrix class with some inheritance
class(train_dense) <- c('pphmatrix', 'shmatrix')
class(train_dense) <- c("pphmatrix", "shmatrix")
expect_true(is.matrix(train_dense))
expect_error(
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
, regexp = NA)
bst <- xgboost(
data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = 0
),
regexp = NA
)
expect_error(pr <- predict(bst, train_dense), regexp = NA)
expect_equal(pr0, pr)
# when someone inherits from xgb.Booster, it should still be possible to use it as xgb.Booster
class(bst) <- c('super.Booster', 'xgb.Booster')
class(bst) <- c("super.Booster", "xgb.Booster")
expect_error(pr <- predict(bst, train_dense), regexp = NA)
expect_equal(pr0, pr)
})
test_that("max_delta_step works", {
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
param <- list(
objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
nthread = n_threads,
eta = 0.5
)
nrounds <- 5
# model with no restriction on max_delta_step
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
@ -395,14 +461,16 @@ test_that("colsample_bytree works", {
test_y <- as.numeric(rowSums(test_x) > 0)
colnames(train_x) <- paste0("Feature_", sprintf("%03d", 1:100))
colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
dtrain <- xgb.DMatrix(train_x, label = train_y)
dtest <- xgb.DMatrix(test_x, label = test_y)
dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
watchlist <- list(train = dtrain, eval = dtest)
## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
## each tree
param <- list(max_depth = 2, eta = 0, nthread = 2,
param <- list(
max_depth = 2, eta = 0, nthread = n_threads,
colsample_bytree = 0.01, objective = "binary:logistic",
eval_metric = "auc")
eval_metric = "auc"
)
set.seed(2)
bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
xgb.importance(model = bst)
@ -412,9 +480,11 @@ test_that("colsample_bytree works", {
})
test_that("Configuration works", {
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
)
config <- xgb.config(bst)
xgb.config(bst) <- config
reloaded_config <- xgb.config(bst)
@ -451,22 +521,26 @@ test_that("strict_shape works", {
y <- as.numeric(iris$Species) - 1
X <- as.matrix(iris[, -5])
bst <- xgboost(data = X, label = y,
max_depth = 2, nrounds = n_rounds,
objective = "multi:softprob", num_class = 3, eval_metric = "merror")
bst <- xgboost(
data = X, label = y,
max_depth = 2, nrounds = n_rounds, nthread = n_threads,
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
)
test_strict_shape(bst, X, 3)
}
test_agaricus <- function() {
data(agaricus.train, package = 'xgboost')
data(agaricus.train, package = "xgboost")
X <- agaricus.train$data
y <- agaricus.train$label
bst <- xgboost(data = X, label = y, max_depth = 2,
bst <- xgboost(
data = X, label = y, max_depth = 2, nthread = n_threads,
nrounds = n_rounds, objective = "binary:logistic",
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
)
test_strict_shape(bst, X, 1)
}
@ -481,8 +555,10 @@ test_that("'predict' accepts CSR data", {
x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
x_csr <- as(x_csc, "RsparseMatrix")
x_spv <- as(x_csc, "sparseVector")
bst <- xgboost(data = X, label = y, objective = "binary:logistic",
nrounds = 5L, verbose = FALSE)
bst <- xgboost(
data = X, label = y, objective = "binary:logistic",
nrounds = 5L, verbose = FALSE, nthread = n_threads,
)
p_csc <- predict(bst, x_csc)
p_csr <- predict(bst, x_csr)
p_spv <- predict(bst, x_spv)

View File

@ -6,6 +6,8 @@ data(agaricus.test, package = 'xgboost')
train <- agaricus.train
test <- agaricus.test
n_threads <- 2
# add some label noise for early stopping tests
add.noise <- function(label, frac) {
inoise <- sample(length(label), length(label) * frac)
@ -15,15 +17,15 @@ add.noise <- function(label, frac) {
set.seed(11)
ltrain <- add.noise(train$label, 0.2)
ltest <- add.noise(test$label, 0.2)
dtrain <- xgb.DMatrix(train$data, label = ltrain)
dtest <- xgb.DMatrix(test$data, label = ltest)
dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
watchlist <- list(train = dtrain, test = dtest)
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
param <- list(objective = "binary:logistic", eval_metric = "error",
max_depth = 2, nthread = 2)
max_depth = 2, nthread = n_threads)
test_that("cb.print.evaluation works as expected", {
@ -103,7 +105,7 @@ test_that("cb.evaluation.log works as expected", {
param <- list(objective = "binary:logistic", eval_metric = "error",
max_depth = 4, nthread = 2)
max_depth = 4, nthread = n_threads)
test_that("can store evaluation_log without printing", {
expect_silent(
@ -179,8 +181,10 @@ test_that("cb.save.model works as expected", {
expect_true(file.exists('xgboost_01.json'))
expect_true(file.exists('xgboost_02.json'))
b1 <- xgb.load('xgboost_01.json')
xgb.parameters(b1) <- list(nthread = 2)
expect_equal(xgb.ntree(b1), 1)
b2 <- xgb.load('xgboost_02.json')
xgb.parameters(b2) <- list(nthread = 2)
expect_equal(xgb.ntree(b2), 2)
xgb.config(b2) <- xgb.config(bst)
@ -267,7 +271,8 @@ test_that("early stopping works with titanic", {
objective = "binary:logistic",
eval_metric = "auc",
nrounds = 100,
early_stopping_rounds = 3
early_stopping_rounds = 3,
nthread = n_threads
)
expect_true(TRUE) # should not crash
@ -308,7 +313,7 @@ test_that("prediction in xgb.cv works", {
test_that("prediction in xgb.cv works for gblinear too", {
set.seed(11)
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = 2)
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads)
cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
expect_false(is.null(cv$evaluation_log))
expect_false(is.null(cv$pred))
@ -341,7 +346,7 @@ test_that("prediction in xgb.cv for softprob works", {
set.seed(11)
expect_warning(
cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
eta = 0.5, nrounds = 5, max_depth = 3, nthread = 2,
eta = 0.5, nrounds = 5, max_depth = 3, nthread = n_threads,
subsample = 0.8, gamma = 2, verbose = 0,
prediction = TRUE, objective = "multi:softprob", num_class = 3)
, NA)

View File

@ -2,10 +2,16 @@ context('Test models with custom objective')
set.seed(1994)
n_threads <- 2
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
)
watchlist <- list(eval = dtest, train = dtrain)
logregobj <- function(preds, dtrain) {
@ -22,7 +28,7 @@ evalerror <- function(preds, dtrain) {
return(list(metric = "error", value = err))
}
param <- list(max_depth = 2, eta = 1, nthread = 2,
param <- list(max_depth = 2, eta = 1, nthread = n_threads,
objective = logregobj, eval_metric = evalerror)
num_round <- 2
@ -67,7 +73,7 @@ test_that("custom objective using DMatrix attr works", {
test_that("custom objective with multi-class shape", {
data <- as.matrix(iris[, -5])
label <- as.numeric(iris$Species) - 1
dtrain <- xgb.DMatrix(data = data, label = label)
dtrain <- xgb.DMatrix(data = data, label = label, nthread = n_threads)
n_classes <- 3
fake_softprob <- function(preds, dtrain) {

View File

@ -5,19 +5,21 @@ data(agaricus.test, package = "xgboost")
test_data <- agaricus.test$data[1:100, ]
test_label <- agaricus.test$label[1:100]
n_threads <- 2
test_that("xgb.DMatrix: basic construction", {
# from sparse matrix
dtest1 <- xgb.DMatrix(test_data, label = test_label)
dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
# from dense matrix
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label)
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label, nthread = n_threads)
expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label"))
expect_equal(dim(dtest1), dim(dtest2))
# from dense integer matrix
int_data <- as.matrix(test_data)
storage.mode(int_data) <- "integer"
dtest3 <- xgb.DMatrix(int_data, label = test_label)
dtest3 <- xgb.DMatrix(int_data, label = test_label, nthread = n_threads)
expect_equal(dim(dtest1), dim(dtest3))
n_samples <- 100
@ -29,15 +31,15 @@ test_that("xgb.DMatrix: basic construction", {
X <- matrix(X, nrow = n_samples)
y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)
fd <- xgb.DMatrix(X, label = y, missing = 1)
fd <- xgb.DMatrix(X, label = y, missing = 1, nthread = n_threads)
dgc <- as(X, "dgCMatrix")
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0)
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0, nthread = n_threads)
dgr <- as(X, "dgRMatrix")
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1)
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1, nthread = n_threads)
params <- list(tree_method = "hist")
params <- list(tree_method = "hist", nthread = n_threads)
bst_fd <- xgb.train(
params, nrounds = 8, fd, watchlist = list(train = fd)
)
@ -64,12 +66,12 @@ test_that("xgb.DMatrix: NA", {
)
x[1, "x1"] <- NA
m <- xgb.DMatrix(x)
m <- xgb.DMatrix(x, nthread = n_threads)
xgb.DMatrix.save(m, "int.dmatrix")
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
colnames(x) <- c("x1", "x2")
m <- xgb.DMatrix(x)
m <- xgb.DMatrix(x, nthread = n_threads)
xgb.DMatrix.save(m, "float.dmatrix")
@ -94,7 +96,7 @@ test_that("xgb.DMatrix: NA", {
test_that("xgb.DMatrix: saving, loading", {
# save to a local file
dtest1 <- xgb.DMatrix(test_data, label = test_label)
dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
tmp_file <- tempfile('xgb.DMatrix_')
on.exit(unlink(tmp_file))
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
@ -109,13 +111,17 @@ test_that("xgb.DMatrix: saving, loading", {
tmp_file <- tempfile(fileext = ".libsvm")
writeLines(tmp, tmp_file)
expect_true(file.exists(tmp_file))
dtest4 <- xgb.DMatrix(paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE)
dtest4 <- xgb.DMatrix(
paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE, nthread = n_threads
)
expect_equal(dim(dtest4), c(3, 4))
expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))
# check that feature info is saved
data(agaricus.train, package = 'xgboost')
dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
dtrain <- xgb.DMatrix(
data = agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
cnames <- colnames(dtrain)
expect_equal(length(cnames), 126)
tmp_file <- tempfile('xgb.DMatrix_')
@ -129,7 +135,7 @@ test_that("xgb.DMatrix: saving, loading", {
})
test_that("xgb.DMatrix: getinfo & setinfo", {
dtest <- xgb.DMatrix(test_data)
dtest <- xgb.DMatrix(test_data, nthread = n_threads)
expect_true(setinfo(dtest, 'label', test_label))
labels <- getinfo(dtest, 'label')
expect_equal(test_label, getinfo(dtest, 'label'))
@ -156,7 +162,7 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
})
test_that("xgb.DMatrix: slice, dim", {
dtest <- xgb.DMatrix(test_data, label = test_label)
dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
expect_equal(dim(dtest), dim(test_data))
dsub1 <- slice(dtest, 1:42)
expect_equal(nrow(dsub1), 42)
@ -171,16 +177,20 @@ test_that("xgb.DMatrix: slice, trailing empty rows", {
data(agaricus.train, package = 'xgboost')
train_data <- agaricus.train$data
train_label <- agaricus.train$label
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
dtrain <- xgb.DMatrix(
data = train_data, label = train_label, nthread = n_threads
)
slice(dtrain, 6513L)
train_data[6513, ] <- 0
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
dtrain <- xgb.DMatrix(
data = train_data, label = train_label, nthread = n_threads
)
slice(dtrain, 6513L)
expect_equal(nrow(dtrain), 6513)
})
test_that("xgb.DMatrix: colnames", {
dtest <- xgb.DMatrix(test_data, label = test_label)
dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
expect_equal(colnames(dtest), colnames(test_data))
expect_error(colnames(dtest) <- 'asdf')
new_names <- make.names(seq_len(ncol(test_data)))
@ -196,7 +206,7 @@ test_that("xgb.DMatrix: nrow is correct for a very sparse matrix", {
x <- Matrix::rsparsematrix(nr, 100, density = 0.0005)
# we want it very sparse, so that last rows are empty
expect_lt(max(x@i), nr)
dtest <- xgb.DMatrix(x)
dtest <- xgb.DMatrix(x, nthread = n_threads)
expect_equal(dim(dtest), dim(x))
})
@ -205,8 +215,8 @@ test_that("xgb.DMatrix: print", {
# core DMatrix with just data and labels
dtrain <- xgb.DMatrix(
data = agaricus.train$data
, label = agaricus.train$label
data = agaricus.train$data, label = agaricus.train$label,
nthread = n_threads
)
txt <- capture.output({
print(dtrain)
@ -222,10 +232,11 @@ test_that("xgb.DMatrix: print", {
# DMatrix with weights and base_margin
dtrain <- xgb.DMatrix(
data = agaricus.train$data
, label = agaricus.train$label
, weight = seq_along(agaricus.train$label)
, base_margin = agaricus.train$label
data = agaricus.train$data,
label = agaricus.train$label,
weight = seq_along(agaricus.train$label),
base_margin = agaricus.train$label,
nthread = n_threads
)
txt <- capture.output({
print(dtrain)
@ -234,7 +245,8 @@ test_that("xgb.DMatrix: print", {
# DMatrix with just features
dtrain <- xgb.DMatrix(
data = agaricus.train$data
data = agaricus.train$data,
nthread = n_threads
)
txt <- capture.output({
print(dtrain)
@ -245,7 +257,8 @@ test_that("xgb.DMatrix: print", {
data_no_colnames <- agaricus.train$data
colnames(data_no_colnames) <- NULL
dtrain <- xgb.DMatrix(
data = data_no_colnames
data = data_no_colnames,
nthread = n_threads
)
txt <- capture.output({
print(dtrain)

View File

@ -1,5 +1,7 @@
context("feature weights")
n_threads <- 2
test_that("training with feature weights works", {
nrows <- 1000
ncols <- 9
@ -10,8 +12,12 @@ test_that("training with feature weights works", {
test <- function(tm) {
names <- paste0("f", 1:ncols)
xy <- xgb.DMatrix(data = x, label = y, feature_weights = weights)
params <- list(colsample_bynode = 0.4, tree_method = tm, nthread = 1)
xy <- xgb.DMatrix(
data = x, label = y, feature_weights = weights, nthread = n_threads
)
params <- list(
colsample_bynode = 0.4, tree_method = tm, nthread = n_threads
)
model <- xgb.train(params = params, data = xy, nrounds = 32)
importance <- xgb.importance(model = model, feature_names = names)
expect_equal(dim(importance), c(ncols, 4))

View File

@ -1,13 +1,19 @@
context('Test generalized linear models')
n_threads <- 2
test_that("gblinear works", {
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
)
param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
watchlist <- list(eval = dtest, train = dtrain)
n <- 5 # iterations
@ -48,12 +54,16 @@ test_that("gblinear works", {
test_that("gblinear early stopping works", {
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
)
param <- list(
objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
updater = "coord_descent"
)

View File

@ -171,6 +171,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
fit <- xgboost(
params = c(
list(
nthread = 2,
booster = booster,
objective = "reg:squarederror",
eval_metric = "rmse"),
@ -257,7 +258,7 @@ test_that("xgb.Booster serializing as R object works", {
.skip_if_vcd_not_available()
saveRDS(bst.Tree, 'xgb.model.rds')
bst <- readRDS('xgb.model.rds')
dtrain <- xgb.DMatrix(sparse_matrix, label = label)
dtrain <- xgb.DMatrix(sparse_matrix, label = label, nthread = 2)
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
xgb.save(bst, 'xgb.model')
@ -363,7 +364,8 @@ test_that("xgb.importance works with and without feature names", {
data = as.matrix(data.frame(x = c(0, 1))),
label = c(1, 2),
nrounds = 1,
base_score = 0.5
base_score = 0.5,
nthread = 2
)
df <- xgb.model.dt.tree(model = m)
expect_equal(df$Feature, "Leaf")

View File

@ -2,6 +2,8 @@ require(xgboost)
context("interaction constraints")
n_threads <- 2
set.seed(1024)
x1 <- rnorm(1000, 1)
x2 <- rnorm(1000, 1)
@ -45,11 +47,18 @@ test_that("interaction constraints scientific representation", {
d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols)
y <- rnorm(rows)
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))
with_inc <- xgb.train(data = dtrain, tree_method = 'hist',
interaction_constraints = inc, nrounds = 10)
without_inc <- xgb.train(data = dtrain, tree_method = 'hist', nrounds = 10)
with_inc <- xgb.train(
data = dtrain,
tree_method = 'hist',
interaction_constraints = inc,
nrounds = 10,
nthread = n_threads
)
without_inc <- xgb.train(
data = dtrain, tree_method = 'hist', nrounds = 10, nthread = n_threads
)
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
})

View File

@ -1,6 +1,7 @@
context('Test prediction of feature interactions')
set.seed(123)
n_threads <- 2
test_that("predict feature interactions works", {
# simulate some binary data and a linear outcome with an interaction term
@ -19,8 +20,10 @@ test_that("predict feature interactions works", {
y <- f_int(X)
dm <- xgb.DMatrix(X, label = y)
param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
dm <- xgb.DMatrix(X, label = y, nthread = n_threads)
param <- list(
eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = n_threads
)
b <- xgb.train(param, dm, 100)
pred <- predict(b, dm, outputmargin = TRUE)
@ -99,11 +102,13 @@ test_that("SHAP contribution values are not NAN", {
verbose = 0,
params = list(
objective = "reg:squarederror",
eval_metric = "rmse"),
eval_metric = "rmse",
nthread = n_threads
),
data = as.matrix(subset(d, fold == 2)[, ivs]),
label = subset(d, fold == 2)$y,
nthread = 1,
nrounds = 3)
nrounds = 3
)
shaps <- as.data.frame(predict(fit,
newdata = as.matrix(subset(d, fold == 1)[, ivs]),
@ -116,8 +121,12 @@ test_that("SHAP contribution values are not NAN", {
test_that("multiclass feature interactions work", {
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
dm <- xgb.DMatrix(
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
)
param <- list(
eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3, nthread = n_threads
)
b <- xgb.train(param, dm, 40)
pred <- t(
array(
@ -166,6 +175,7 @@ test_that("SHAP single sample works", {
max_depth = 2,
nrounds = 4,
objective = "binary:logistic",
nthread = n_threads
)
predt <- predict(

View File

@ -9,7 +9,8 @@ test_that("load/save raw works", {
nrounds <- 8
booster <- xgboost(
data = train$data, label = train$label,
nrounds = nrounds, objective = "binary:logistic"
nrounds = nrounds, objective = "binary:logistic",
nthread = 2
)
json_bytes <- xgb.save.raw(booster, raw_format = "json")

View File

@ -66,7 +66,7 @@ test_that("Models from previous versions of XGBoost can be loaded", {
unzip(zipfile, exdir = extract_dir, overwrite = TRUE)
model_dir <- file.path(extract_dir, 'models')
pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4))
pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4), nthread = 2)
lapply(list.files(model_dir), function (x) {
model_file <- file.path(model_dir, x)
@ -87,6 +87,7 @@ test_that("Models from previous versions of XGBoost can be loaded", {
booster <- readRDS(model_file)
} else {
booster <- xgb.load(model_file)
xgb.parameters(booster) <- list(nthread = 2)
}
predict(booster, newdata = pred_data)
run_booster_check(booster, name)

View File

@ -3,8 +3,12 @@ context('Test model params and call are exposed to R')
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = 2
)
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = 2
)
bst <- xgboost(data = dtrain,
max_depth = 2,

View File

@ -4,8 +4,10 @@ set.seed(1994)
test_that("Poisson regression works", {
data(mtcars)
bst <- xgboost(data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
objective = 'count:poisson', nrounds = 10, verbose = 0)
bst <- xgboost(
data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
objective = 'count:poisson', nrounds = 10, verbose = 0, nthread = 2
)
expect_equal(class(bst), "xgb.Booster")
pred <- predict(bst, as.matrix(mtcars[, -11]))
expect_equal(length(pred), 32)

View File

@ -1,5 +1,7 @@
context('Learning to rank')
n_threads <- 2
test_that('Test ranking with unweighted data', {
X <- Matrix::sparseMatrix(
i = c(2, 3, 7, 9, 12, 15, 17, 18)
@ -9,10 +11,10 @@ test_that('Test ranking with unweighted data', {
)
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
group <- c(5, 5, 5, 5)
dtrain <- xgb.DMatrix(X, label = y, group = group)
dtrain <- xgb.DMatrix(X, label = y, group = group, nthread = n_threads)
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
eval_metric = 'auc', eval_metric = 'aucpr')
eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
# Check if the metric is monotone increasing
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
@ -29,10 +31,14 @@ test_that('Test ranking with weighted data', {
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
group <- c(5, 5, 5, 5)
weight <- c(1.0, 2.0, 3.0, 4.0)
dtrain <- xgb.DMatrix(X, label = y, group = group, weight = weight)
dtrain <- xgb.DMatrix(
X, label = y, group = group, weight = weight, nthread = n_threads
)
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
eval_metric = 'auc', eval_metric = 'aucpr')
params <- list(
eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
)
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
# Check if the metric is monotone increasing
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))

View File

@ -16,6 +16,7 @@ test_that("Can save and load models with Unicode paths", {
path <- file.path(tmpdir, x)
xgb.save(bst, path)
bst2 <- xgb.load(path)
xgb.parameters(bst2) <- list(nthread = 2)
expect_equal(predict(bst, test$data), predict(bst2, test$data))
})
})

View File

@ -2,8 +2,15 @@ context("update trees in an existing model")
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
n_threads <- 1
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
)
# Disable flaky tests for 32-bit Windows.
# See https://github.com/dmlc/xgboost/issues/3720
@ -14,7 +21,7 @@ test_that("updating the model works", {
# no-subsampling
p1 <- list(
objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2,
objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = n_threads,
updater = "grow_colmaker,prune"
)
set.seed(11)
@ -86,9 +93,11 @@ test_that("updating the model works", {
})
test_that("updating works for multiclass & multitree", {
dtr <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
dtr <- xgb.DMatrix(
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
)
watchlist <- list(train = dtr)
p0 <- list(max_depth = 2, eta = 0.5, nthread = 2, subsample = 0.6,
p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
base_score = 0)
set.seed(121)

View File

@ -31,6 +31,8 @@ require(data.table)
if (!require('vcd')) {
install.packages('vcd')
}
data.table::setDTthreads(2)
```
> **VCD** package is used for one of its embedded dataset only.
@ -297,23 +299,25 @@ test <- agaricus.test
#Random Forest - 1000 trees
bst <- xgboost(
data = train$data
, label = train$label
, max_depth = 4
, num_parallel_tree = 1000
, subsample = 0.5
, colsample_bytree = 0.5
, nrounds = 1
, objective = "binary:logistic"
data = train$data,
label = train$label,
max_depth = 4,
num_parallel_tree = 1000,
subsample = 0.5,
colsample_bytree = 0.5,
nrounds = 1,
objective = "binary:logistic",
nthread = 2
)
#Boosting - 3 rounds
bst <- xgboost(
data = train$data
, label = train$label
, max_depth = 4
, nrounds = 3
, objective = "binary:logistic"
data = train$data,
label = train$label,
max_depth = 4,
nrounds = 3,
objective = "binary:logistic",
nthread = 2
)
```

View File

@ -86,9 +86,10 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
nrounds = 2, objective = "binary:logistic")
nrounds = 2, objective = "binary:logistic", nthread = 2)
xgb.save(bst, 'model.save')
bst = xgb.load('model.save')
xgb.parameters(bst) <- list(nthread = 2)
pred <- predict(bst, test$data)
@
@ -127,7 +128,7 @@ training from initial prediction value, weighted training instance.
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
<<xgb.DMatrix>>=
dtrain <- xgb.DMatrix(train$data, label = train$label)
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)
class(dtrain)
head(getinfo(dtrain,'label'))
@
@ -161,9 +162,9 @@ evalerror <- function(preds, dtrain) {
return(list(metric = "MSE", value = err))
}
dtest <- xgb.DMatrix(test$data, label = test$label)
dtest <- xgb.DMatrix(test$data, label = test$label, nthread = 2)
watchlist <- list(eval = dtest, train = dtrain)
param <- list(max_depth = 2, eta = 1)
param <- list(max_depth = 2, eta = 1, nthread = 2)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
@

View File

@ -173,13 +173,13 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**
```{r trainingDense, message=F, warning=F}
bstDense <- xgboost(
data = as.matrix(train$data)
, label = train$label
, max_depth = 2
, eta = 1
, nthread = 2
, nrounds = 2
, objective = "binary:logistic"
data = as.matrix(train$data),
label = train$label,
max_depth = 2,
eta = 1,
nthread = 2,
nrounds = 2,
objective = "binary:logistic"
)
```
@ -188,14 +188,14 @@ bstDense <- xgboost(
**XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.
```{r trainingDmatrix, message=F, warning=F}
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
bstDMatrix <- xgboost(
data = dtrain
, max_depth = 2
, eta = 1
, nthread = 2
, nrounds = 2
, objective = "binary:logistic"
data = dtrain,
max_depth = 2,
eta = 1,
nthread = 2,
nrounds = 2,
objective = "binary:logistic"
)
```
@ -314,8 +314,8 @@ Most of the features below have been implemented to help you to improve your mod
For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
```{r DMatrix, message=F, warning=F}
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
dtest <- xgb.DMatrix(data = test$data, label = test$label)
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
dtest <- xgb.DMatrix(data = test$data, label = test$label, nthread = 2)
```
### Measure learning progress with xgb.train
@ -476,6 +476,7 @@ An interesting test to see how identical our saved model is to the original one
```{r loadModel, message=F, warning=F}
# load binary model to R
bst2 <- xgb.load("xgboost.model")
xgb.parameters(bst2) <- list(nthread = 2)
pred2 <- predict(bst2, test$data)
# And now the test
@ -500,6 +501,7 @@ print(class(rawVec))
# load binary model to R
bst3 <- xgb.load(rawVec)
xgb.parameters(bst3) <- list(nthread = 2)
pred3 <- predict(bst3, test$data)
# pred2 should be identical to pred

View File

@ -80,6 +80,24 @@ R package versioning
====================
See :ref:`release`.
Testing R package with different compilers
==========================================
You can change the default compiler of R by changing the configuration file in home
directory. For instance, if you want to test XGBoost built with clang++ instead of g++ on
Linux, put the following in your ``~/.R/Makevars`` file:
.. code-block:: sh
CC=clang-15
CXX17=clang++-15
Be aware that the variable name should match with the name used by ``R CMD``:
.. code-block:: sh
R CMD config CXX17
Registering native routines in R
================================
According to `R extension manual <https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines>`_,

View File

@ -50,6 +50,7 @@ inline void EllpackPageSource::Fetch() {
// silent the warning about unused variables.
(void)(row_stride_);
(void)(is_dense_);
(void)(device_);
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)

View File

@ -106,14 +106,30 @@ void GBTreeModel::Load(dmlc::Stream* fi) {
Validate(*this);
}
namespace {
std::int32_t IOThreads(Context const* ctx) {
CHECK(ctx);
std::int32_t n_threads = ctx->Threads();
// CRAN checks for number of threads used by examples, but we might not have the right
// number of threads when serializing/unserializing models as nthread is a booster
// parameter, which is only effective after booster initialization.
//
// The threshold ratio of CPU time to user time for R is 2.5, we set the number of
// threads to 2.
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
n_threads = std::min(2, n_threads);
#endif
return n_threads;
}
} // namespace
void GBTreeModel::SaveModel(Json* p_out) const {
auto& out = *p_out;
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
out["gbtree_model_param"] = ToJson(param);
std::vector<Json> trees_json(trees.size());
CHECK(ctx_);
common::ParallelFor(trees.size(), ctx_->Threads(), [&](auto t) {
common::ParallelFor(trees.size(), IOThreads(ctx_), [&](auto t) {
auto const& tree = trees[t];
Json jtree{Object{}};
tree->SaveModel(&jtree);
@ -151,9 +167,7 @@ void GBTreeModel::LoadModel(Json const& in) {
CHECK_EQ(tree_info_json.size(), param.num_trees);
tree_info.resize(param.num_trees);
CHECK(ctx_);
common::ParallelFor(param.num_trees, ctx_->Threads(), [&](auto t) {
common::ParallelFor(param.num_trees, IOThreads(ctx_), [&](auto t) {
auto tree_id = get<Integer const>(trees_json[t]["id"]);
trees.at(tree_id).reset(new RegTree{});
trees[tree_id]->LoadModel(trees_json[t]);

View File

@ -3,9 +3,15 @@ import argparse
import os
import shutil
import subprocess
from io import StringIO
from pathlib import Path
from platform import system
try:
import pandas as pd
except ImportError:
pd = None
from test_utils import R_PACKAGE, ROOT, DirectoryExcursion, cd, print_time, record_time
@ -97,16 +103,47 @@ def build_rpackage(path: str) -> str:
return tarball
def check_example_timing(rcheck_dir: Path, threshold: float) -> None:
with open(rcheck_dir / "xgboost-Ex.timings", "r") as fd:
timings = fd.readlines()
newlines = []
for line in timings:
line = line.strip()
newlines.append(line)
con_timings = "\n".join(newlines)
df = pd.read_csv(StringIO(con_timings), delimiter="\t")
ratio_n = "user/elapsed"
df[ratio_n] = df["user"] / df["elapsed"]
offending = df[df[ratio_n] > threshold]
try:
# requires the tabulate package
df.to_markdown("timings.md")
offending.to_markdown("offending.md")
except ImportError:
print("failed to export markdown files.")
pass
if offending.shape[0] == 0:
return
print(offending)
raise ValueError("There are examples using too many threads")
@cd(ROOT)
@record_time
def check_rpackage(path: str) -> None:
env = os.environ.copy()
print("Ncpus:", f"{os.cpu_count()}")
threshold = 2.5
env.update(
{
"MAKEFLAGS": f"-j{os.cpu_count()}",
# cran specific environment variables
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(2.5),
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
"_R_CHECK_TEST_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
"_R_CHECK_VIGNETTE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
}
)
@ -118,11 +155,14 @@ def check_rpackage(path: str) -> None:
CC = os.path.join(mingw_bin, "gcc.exe")
env.update({"CC": CC, "CXX": CXX})
status = subprocess.run([R, "CMD", "check", "--as-cran", path], env=env)
with open(Path("xgboost.Rcheck") / "00check.log", "r") as fd:
status = subprocess.run(
[R, "CMD", "check", "--as-cran", "--timings", path], env=env
)
rcheck_dir = Path("xgboost.Rcheck")
with open(rcheck_dir / "00check.log", "r") as fd:
check_log = fd.read()
with open(Path("xgboost.Rcheck") / "00install.out", "r") as fd:
with open(rcheck_dir / "00install.out", "r") as fd:
install_log = fd.read()
msg = f"""
@ -144,6 +184,8 @@ def check_rpackage(path: str) -> None:
if check_log.find("Examples with CPU time") != -1:
print(msg)
raise ValueError("Suspicious NOTE.")
if pd is not None:
check_example_timing(rcheck_dir, threshold)
@cd(R_PACKAGE)
@ -264,6 +306,8 @@ def main(args: argparse.Namespace) -> None:
test_with_autotools()
else:
test_with_cmake(args)
elif args.task == "timings":
check_example_timing(Path("xgboost.Rcheck"), 2.5)
else:
raise ValueError("Unexpected task.")
@ -279,7 +323,7 @@ if __name__ == "__main__":
parser.add_argument(
"--task",
type=str,
choices=["pack", "build", "check", "doc"],
choices=["pack", "build", "check", "doc", "timings"],
default="check",
required=False,
)