[R] Set number of threads in demos and tests. (#9591)

- Restrict the number of threads in IO.
- Specify the number of threads in demos and tests.
- Add helper scripts for checks.
This commit is contained in:
Jiaming Yuan 2023-09-23 21:44:03 +08:00 committed by GitHub
parent def77870f3
commit cac2cd2e94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
51 changed files with 714 additions and 296 deletions

View File

@ -4,3 +4,5 @@
^.*\.Rproj$
^\.Rproj\.user$
README.md
^doc$
^Meta$

View File

@ -557,14 +557,18 @@ cb.cv.predict <- function(save_models = FALSE) {
#'
#' @examples
#' #### Binary classification:
#' #
#'
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
#' # without considering the 2nd order interactions:
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
#' colnames(x)
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
@ -594,9 +598,9 @@ cb.cv.predict <- function(save_models = FALSE) {
#'
#' #### Multiclass classification:
#' #
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
#' lambda = 0.0003, alpha = 0.0003, nthread = 1)
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
#' # For the default linear updater 'shotgun' it sometimes is helpful
#' # to use smaller eta to reduce instability
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,

View File

@ -267,11 +267,16 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
#'
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#'
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
#' eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
#' # use all trees by default
#' pred <- predict(bst, test$data)
#' # use only the 1st tree
@ -337,8 +342,14 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
object <- xgb.Booster.complete(object, saveraw = FALSE)
if (!inherits(newdata, "xgb.DMatrix"))
newdata <- xgb.DMatrix(newdata, missing = missing, nthread = NVL(object$params[["nthread"]], -1))
if (!inherits(newdata, "xgb.DMatrix")) {
config <- jsonlite::fromJSON(xgb.config(object))
nthread <- strtoi(config$learner$generic_param$nthread)
newdata <- xgb.DMatrix(
newdata,
missing = missing, nthread = NVL(nthread, -1)
)
}
if (!is.null(object[["feature_names"]]) &&
!is.null(colnames(newdata)) &&
!identical(object[["feature_names"]], colnames(newdata)))
@ -628,10 +639,15 @@ xgb.attributes <- function(object) {
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#' train <- agaricus.train
#'
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' bst <- xgboost(
#' data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
#' )
#' config <- xgb.config(bst)
#'
#' @rdname xgb.config

View File

@ -18,7 +18,12 @@
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#' dtrain <- with(
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')

View File

@ -22,14 +22,23 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' bst <- xgboost(
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
#' nthread = nthread,
#' nrounds = 2,
#' objective = "binary:logistic"
#' )
#'
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' if (file.exists('xgb.model')) file.remove('xgb.model')
#' pred <- predict(bst, test$data)
#' @export
xgb.load <- function(modelfile) {
if (is.null(modelfile))

View File

@ -46,9 +46,12 @@
#' # Basic use:
#'
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
#'
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
#'

View File

@ -45,10 +45,13 @@
#' @examples
#'
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' # Change max_depth to a higher number to get a more significant result
#' ## Change max_depth to a higher number to get a more significant result
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
#' eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
#' eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
#' subsample = 0.5, min_child_weight = 2)
#'
#' xgb.plot.deepness(bst)

View File

@ -45,9 +45,14 @@
#'
#' @examples
#' data(agaricus.train)
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' bst <- xgboost(
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
#' )
#'
#' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
#'

View File

@ -43,10 +43,15 @@
#' @examples
#'
#' data(agaricus.train, package='xgboost')
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
#' eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
#' min_child_weight = 50, verbose = 0)
#' bst <- xgboost(
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
#' eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
#' min_child_weight = 50, verbose = 0
#' )
#'
#' p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
#' print(p)

View File

@ -74,9 +74,14 @@
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#' nrounds <- 20
#'
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
#' eta = 0.1, max_depth = 3, subsample = .5,
#' method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
#' method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
#'
#' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
#' contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
@ -85,12 +90,11 @@
#'
#' # multiclass example - plots for each class separately:
#' nclass <- 3
#' nrounds <- 20
#' x <- as.matrix(iris[, -5])
#' set.seed(123)
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
#' mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
#' objective = "multi:softprob", num_class = nclass, verbose = 0)
#' trees0 <- seq(from=0, by=nclass, length.out=nrounds)
#' col <- rgb(0, 0, 1, 0.5)

View File

@ -25,14 +25,22 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' bst <- xgboost(
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
#' nthread = nthread,
#' nrounds = 2,
#' objective = "binary:logistic"
#' )
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' if (file.exists('xgb.model')) file.remove('xgb.model')
#' pred <- predict(bst, test$data)
#' @export
xgb.save <- function(model, fname) {
if (typeof(fname) != "character")

View File

@ -16,13 +16,18 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' ## Keep the number of threads to 2 for examples
#' nthread <- 2
#' data.table::setDTthreads(nthread)
#'
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
#'
#' raw <- xgb.save.raw(bst)
#' bst <- xgb.load.raw(raw)
#' pred <- predict(bst, test$data)
#'
#' @export
xgb.save.raw <- function(model, raw_format = "deprecated") {

View File

@ -168,7 +168,8 @@
#' than the \code{xgboost} interface.
#'
#' Parallelization is automatically enabled if \code{OpenMP} is present.
#' Number of threads can also be manually specified via \code{nthread} parameter.
#' Number of threads can also be manually specified via the \code{nthread}
#' parameter.
#'
#' The evaluation metric is chosen automatically by XGBoost (according to the objective)
#' when the \code{eval_metric} parameter is not provided.
@ -237,17 +238,25 @@
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#'
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
#' ## Keep the number of threads to 1 for examples
#' nthread <- 1
#' data.table::setDTthreads(nthread)
#'
#' dtrain <- with(
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
#' dtest <- with(
#' agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
#' watchlist <- list(train = dtrain, eval = dtest)
#'
#' ## A simple xgb.train example:
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
#' objective = "binary:logistic", eval_metric = "auc")
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
#'
#'
#' ## An xgb.train example where custom objective and evaluation metric are used:
#' ## An xgb.train example where custom objective and evaluation metric are
#' ## used:
#' logregobj <- function(preds, dtrain) {
#' labels <- getinfo(dtrain, "label")
#' preds <- 1/(1 + exp(-preds))
@ -263,12 +272,12 @@
#'
#' # These functions could be used by passing them either:
#' # as 'objective' and 'eval_metric' parameters in the params list:
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
#' objective = logregobj, eval_metric = evalerror)
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
#'
#' # or through the ... arguments:
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
#' objective = logregobj, eval_metric = evalerror)
#'
@ -278,7 +287,7 @@
#'
#'
#' ## An xgb.train example of using variable learning rates at each iteration:
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
#' objective = "binary:logistic", eval_metric = "auc")
#' my_etas <- list(eta = c(0.5, 0.1))
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
@ -290,7 +299,7 @@
#'
#' ## An 'xgboost' interface example:
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
#' max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
#' max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
#' objective = "binary:logistic")
#' pred <- predict(bst, agaricus.test$data)
#'

View File

@ -35,14 +35,18 @@ Callback function expects the following values to be set in its calling frame:
}
\examples{
#### Binary classification:
#
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
# without considering the 2nd order interactions:
x <- model.matrix(Species ~ .^2, iris)[,-1]
colnames(x)
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
lambda = 0.0003, alpha = 0.0003, nthread = 2)
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
# For 'shotgun', which is a default linear updater, using high eta values may result in
# unstable behaviour in some datasets. With this simple dataset, however, the high learning
# rate does not break the convergence, but allows us to illustrate the typical pattern of
@ -72,9 +76,9 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
#### Multiclass classification:
#
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
lambda = 0.0003, alpha = 0.0003, nthread = 1)
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
# For the default linear updater 'shotgun' it sometimes is helpful
# to use smaller eta to reduce instability
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,

View File

@ -132,11 +132,16 @@ Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple th
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
# use all trees by default
pred <- predict(bst, test$data)
# use only the 1st tree

View File

@ -38,7 +38,12 @@ Supported input file formats are either a LIBSVM text file or a binary file that
}
\examples{
data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
dtrain <- with(
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
)
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')

View File

@ -19,10 +19,15 @@ Accessors for model parameters as JSON string.
}
\examples{
data(agaricus.train, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
)
config <- xgb.config(bst)
}

View File

@ -27,14 +27,23 @@ not \code{xgb.load}.
\examples{
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2, eta = 1,
nthread = nthread,
nrounds = 2,
objective = "binary:logistic"
)
xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model')
if (file.exists('xgb.model')) file.remove('xgb.model')
pred <- predict(bst, test$data)
}
\seealso{
\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.

View File

@ -66,9 +66,12 @@ Parse a boosted tree model text dump into a \code{data.table} structure.
# Basic use:
data(agaricus.train, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))

View File

@ -61,10 +61,13 @@ This function was inspired by the blog post
\examples{
data(agaricus.train, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
# Change max_depth to a higher number to get a more significant result
## Change max_depth to a higher number to get a more significant result
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
subsample = 0.5, min_child_weight = 2)
xgb.plot.deepness(bst)

View File

@ -77,9 +77,14 @@ with bar colors corresponding to different clusters that have somewhat similar i
}
\examples{
data(agaricus.train)
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
bst <- xgboost(
data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
)
importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)

View File

@ -63,10 +63,15 @@ This function is inspired by this blog post:
\examples{
data(agaricus.train, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
min_child_weight = 50, verbose = 0)
bst <- xgboost(
data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
min_child_weight = 50, verbose = 0
)
p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
print(p)

View File

@ -124,9 +124,14 @@ a meaningful thing to do.
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
nrounds <- 20
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
eta = 0.1, max_depth = 3, subsample = .5,
method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
@ -135,12 +140,11 @@ xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12) # S
# multiclass example - plots for each class separately:
nclass <- 3
nrounds <- 20
x <- as.matrix(iris[, -5])
set.seed(123)
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
objective = "multi:softprob", num_class = nclass, verbose = 0)
trees0 <- seq(from=0, by=nclass, length.out=nrounds)
col <- rgb(0, 0, 1, 0.5)

View File

@ -31,14 +31,22 @@ releases of XGBoost.
\examples{
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2, eta = 1,
nthread = nthread,
nrounds = 2,
objective = "binary:logistic"
)
xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model')
if (file.exists('xgb.model')) file.remove('xgb.model')
pred <- predict(bst, test$data)
}
\seealso{
\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.

View File

@ -25,12 +25,17 @@ Save xgboost model from xgboost or xgb.train
\examples{
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
## Keep the number of threads to 2 for examples
nthread <- 2
data.table::setDTthreads(nthread)
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
raw <- xgb.save.raw(bst)
bst <- xgb.load.raw(raw)
pred <- predict(bst, test$data)
}

View File

@ -250,7 +250,8 @@ customized objective and evaluation metric functions, therefore it is more flexi
than the \code{xgboost} interface.
Parallelization is automatically enabled if \code{OpenMP} is present.
Number of threads can also be manually specified via \code{nthread} parameter.
Number of threads can also be manually specified via the \code{nthread}
parameter.
The evaluation metric is chosen automatically by XGBoost (according to the objective)
when the \code{eval_metric} parameter is not provided.
@ -286,17 +287,25 @@ The following callbacks are automatically created when certain parameters are se
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
dtrain <- with(
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
)
dtest <- with(
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
)
watchlist <- list(train = dtrain, eval = dtest)
## A simple xgb.train example:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
## An xgb.train example where custom objective and evaluation metric are used:
## An xgb.train example where custom objective and evaluation metric are
## used:
logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
preds <- 1/(1 + exp(-preds))
@ -312,12 +321,12 @@ evalerror <- function(preds, dtrain) {
# These functions could be used by passing them either:
# as 'objective' and 'eval_metric' parameters in the params list:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
objective = logregobj, eval_metric = evalerror)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
# or through the ... arguments:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
objective = logregobj, eval_metric = evalerror)
@ -327,7 +336,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
## An xgb.train example of using variable learning rates at each iteration:
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
my_etas <- list(eta = c(0.5, 0.1))
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
@ -339,7 +348,7 @@ bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
## An 'xgboost' interface example:
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
objective = "binary:logistic")
pred <- predict(bst, agaricus.test$data)

View File

@ -0,0 +1,25 @@
## Helper script for running individual examples.
library(pkgload)
library(xgboost)
files <- list.files("./man")
run_example_timeit <- function(f) {
path <- paste("./man/", f, sep = "")
print(paste("Test", f))
flush.console()
t0 <- proc.time()
run_example(path)
t1 <- proc.time()
list(file = f, time = t1 - t0)
}
timings <- lapply(files, run_example_timeit)
for (t in timings) {
ratio <- t$time[1] / t$time[3]
if (!is.na(ratio) && !is.infinite(ratio) && ratio >= 2.5) {
print(paste("Offending example:", t$file, ratio))
}
}

View File

@ -1,23 +1,28 @@
context("basic functions")
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
data(agaricus.train, package = "xgboost")
data(agaricus.test, package = "xgboost")
train <- agaricus.train
test <- agaricus.test
set.seed(1994)
# disable some tests for Win32
windows_flag <- .Platform$OS.type == "windows" &&
.Machine$sizeof.pointer != 8
solaris_flag <- (Sys.info()['sysname'] == "SunOS")
.Machine$sizeof.pointer != 8
solaris_flag <- (Sys.info()["sysname"] == "SunOS")
n_threads <- 1
test_that("train and predict binary classification", {
nrounds <- 2
expect_output(
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
eval_metric = "error")
, "train-error")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = nrounds,
objective = "binary:logistic", eval_metric = "error"
),
"train-error"
)
expect_equal(class(bst), "xgb.Booster")
expect_equal(bst$niter, nrounds)
expect_false(is.null(bst$evaluation_log))
@ -46,26 +51,39 @@ test_that("parameter validation works", {
d <- cbind(
x1 = rnorm(10),
x2 = rnorm(10),
x3 = rnorm(10))
x3 = rnorm(10)
)
y <- d[, "x1"] + d[, "x2"]^2 +
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
rnorm(10)
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
correct <- function() {
params <- list(max_depth = 2, booster = "dart",
rate_drop = 0.5, one_drop = TRUE,
objective = "reg:squarederror")
params <- list(
max_depth = 2,
booster = "dart",
rate_drop = 0.5,
one_drop = TRUE,
nthread = n_threads,
objective = "reg:squarederror"
)
xgb.train(params = params, data = dtrain, nrounds = nrounds)
}
expect_silent(correct())
incorrect <- function() {
params <- list(max_depth = 2, booster = "dart",
rate_drop = 0.5, one_drop = TRUE,
objective = "reg:squarederror",
foo = "bar", bar = "foo")
params <- list(
max_depth = 2,
booster = "dart",
rate_drop = 0.5,
one_drop = TRUE,
objective = "reg:squarederror",
nthread = n_threads,
foo = "bar",
bar = "foo"
)
output <- capture.output(
xgb.train(params = params, data = dtrain, nrounds = nrounds))
xgb.train(params = params, data = dtrain, nrounds = nrounds)
)
print(output)
}
expect_output(incorrect(), '\\\\"bar\\\\", \\\\"foo\\\\"')
@ -79,7 +97,8 @@ test_that("dart prediction works", {
d <- cbind(
x1 = rnorm(100),
x2 = rnorm(100),
x3 = rnorm(100))
x3 = rnorm(100)
)
y <- d[, "x1"] + d[, "x2"]^2 +
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
rnorm(100)
@ -93,7 +112,7 @@ test_that("dart prediction works", {
rate_drop = 0.5,
one_drop = TRUE,
eta = 1,
nthread = 2,
nthread = n_threads,
nrounds = nrounds,
objective = "reg:squarederror"
)
@ -105,7 +124,7 @@ test_that("dart prediction works", {
expect_false(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
set.seed(1994)
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
booster_by_train <- xgb.train(
params = list(
booster = "dart",
@ -113,7 +132,7 @@ test_that("dart prediction works", {
eta = 1,
rate_drop = 0.5,
one_drop = TRUE,
nthread = 1,
nthread = n_threads,
objective = "reg:squarederror"
),
data = dtrain,
@ -132,10 +151,13 @@ test_that("train and predict softprob", {
lb <- as.numeric(iris$Species) - 1
set.seed(11)
expect_output(
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
objective = "multi:softprob", num_class = 3, eval_metric = "merror")
, "train-merror")
bst <- xgboost(
data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
),
"train-merror"
)
expect_false(is.null(bst$evaluation_log))
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
expect_equal(bst$niter * 3, xgb.ntree(bst))
@ -164,9 +186,10 @@ test_that("train and predict softprob", {
x3 = rnorm(100)
)
y <- sample.int(10, 100, replace = TRUE) - 1
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
booster <- xgb.train(
params = list(tree_method = "hist"), data = dtrain, nrounds = 4, num_class = 10,
params = list(tree_method = "hist", nthread = n_threads),
data = dtrain, nrounds = 4, num_class = 10,
objective = "multi:softprob"
)
predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE)
@ -178,10 +201,13 @@ test_that("train and predict softmax", {
lb <- as.numeric(iris$Species) - 1
set.seed(11)
expect_output(
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
objective = "multi:softmax", num_class = 3, eval_metric = "merror")
, "train-merror")
bst <- xgboost(
data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
objective = "multi:softmax", num_class = 3, eval_metric = "merror"
),
"train-merror"
)
expect_false(is.null(bst$evaluation_log))
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
expect_equal(bst$niter * 3, xgb.ntree(bst))
@ -196,16 +222,19 @@ test_that("train and predict RF", {
set.seed(11)
lb <- train$label
# single iteration
bst <- xgboost(data = train$data, label = lb, max_depth = 5,
nthread = 2, nrounds = 1, objective = "binary:logistic", eval_metric = "error",
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
bst <- xgboost(
data = train$data, label = lb, max_depth = 5,
nthread = n_threads,
nrounds = 1, objective = "binary:logistic", eval_metric = "error",
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1
)
expect_equal(bst$niter, 1)
expect_equal(xgb.ntree(bst), 20)
pred <- predict(bst, train$data)
pred_err <- sum((pred > 0.5) != lb) / length(lb)
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
#expect_lt(pred_err, 0.03)
# expect_lt(pred_err, 0.03)
pred <- predict(bst, train$data, ntreelimit = 20)
pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
@ -219,11 +248,13 @@ test_that("train and predict RF with softprob", {
lb <- as.numeric(iris$Species) - 1
nrounds <- 15
set.seed(11)
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds,
objective = "multi:softprob", eval_metric = "merror",
num_class = 3, verbose = 0,
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5)
bst <- xgboost(
data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.9, nthread = n_threads, nrounds = nrounds,
objective = "multi:softprob", eval_metric = "merror",
num_class = 3, verbose = 0,
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5
)
expect_equal(bst$niter, 15)
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
# predict for all iterations:
@ -240,18 +271,24 @@ test_that("train and predict RF with softprob", {
test_that("use of multiple eval metrics works", {
expect_output(
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
, "train-error.*train-auc.*train-logloss")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
),
"train-error.*train-auc.*train-logloss"
)
expect_false(is.null(bst$evaluation_log))
expect_equal(dim(bst$evaluation_log), c(2, 4))
expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
expect_output(
bst2 <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
eval_metric = list("error", "auc", "logloss"))
, "train-error.*train-auc.*train-logloss")
bst2 <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = list("error", "auc", "logloss")
),
"train-error.*train-auc.*train-logloss"
)
expect_false(is.null(bst2$evaluation_log))
expect_equal(dim(bst2$evaluation_log), c(2, 4))
expect_equal(colnames(bst2$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
@ -259,9 +296,11 @@ test_that("use of multiple eval metrics works", {
test_that("training continuation works", {
dtrain <- xgb.DMatrix(train$data, label = train$label)
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
param <- list(
objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
)
# for the reference, use 4 iterations at once:
set.seed(11)
@ -271,30 +310,33 @@ test_that("training continuation works", {
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
# continue for two more:
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
if (!windows_flag && !solaris_flag)
if (!windows_flag && !solaris_flag) {
expect_equal(bst$raw, bst2$raw)
}
expect_false(is.null(bst2$evaluation_log))
expect_equal(dim(bst2$evaluation_log), c(4, 2))
expect_equal(bst2$evaluation_log, bst$evaluation_log)
# test continuing from raw model data
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
if (!windows_flag && !solaris_flag)
if (!windows_flag && !solaris_flag) {
expect_equal(bst$raw, bst2$raw)
}
expect_equal(dim(bst2$evaluation_log), c(2, 2))
# test continuing from a model in file
xgb.save(bst1, "xgboost.json")
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.json")
if (!windows_flag && !solaris_flag)
if (!windows_flag && !solaris_flag) {
expect_equal(bst$raw, bst2$raw)
}
expect_equal(dim(bst2$evaluation_log), c(2, 2))
file.remove("xgboost.json")
})
test_that("model serialization works", {
out_path <- "model_serialization"
dtrain <- xgb.DMatrix(train$data, label = train$label)
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic")
param <- list(objective = "binary:logistic", nthread = n_threads)
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
raw <- xgb.serialize(booster)
saveRDS(raw, out_path)
@ -309,11 +351,14 @@ test_that("model serialization works", {
test_that("xgb.cv works", {
set.seed(11)
expect_output(
cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", verbose = TRUE)
, "train-error:")
expect_is(cv, 'xgb.cv.synchronous')
cv <- xgb.cv(
data = train$data, label = train$label, max_depth = 2, nfold = 5,
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", verbose = TRUE
),
"train-error:"
)
expect_is(cv, "xgb.cv.synchronous")
expect_false(is.null(cv$evaluation_log))
expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008)
@ -326,15 +371,19 @@ test_that("xgb.cv works", {
})
test_that("xgb.cv works with stratified folds", {
dtrain <- xgb.DMatrix(train$data, label = train$label)
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
set.seed(314159)
cv <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
verbose = TRUE, stratified = FALSE)
cv <- xgb.cv(
data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = TRUE, stratified = FALSE
)
set.seed(314159)
cv2 <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
verbose = TRUE, stratified = TRUE)
cv2 <- xgb.cv(
data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = TRUE, stratified = TRUE
)
# Stratified folds should result in a different evaluation logs
expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
})
@ -342,40 +391,57 @@ test_that("xgb.cv works with stratified folds", {
test_that("train and predict with non-strict classes", {
# standard dense matrix input
train_dense <- as.matrix(train$data)
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
bst <- xgboost(
data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = 0
)
pr0 <- predict(bst, train_dense)
# dense matrix-like input of non-matrix class
class(train_dense) <- 'shmatrix'
class(train_dense) <- "shmatrix"
expect_true(is.matrix(train_dense))
expect_error(
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
, regexp = NA)
bst <- xgboost(
data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = 0
),
regexp = NA
)
expect_error(pr <- predict(bst, train_dense), regexp = NA)
expect_equal(pr0, pr)
# dense matrix-like input of non-matrix class with some inheritance
class(train_dense) <- c('pphmatrix', 'shmatrix')
class(train_dense) <- c("pphmatrix", "shmatrix")
expect_true(is.matrix(train_dense))
expect_error(
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
, regexp = NA)
bst <- xgboost(
data = train_dense, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = 0
),
regexp = NA
)
expect_error(pr <- predict(bst, train_dense), regexp = NA)
expect_equal(pr0, pr)
# when someone inherits from xgb.Booster, it should still be possible to use it as xgb.Booster
class(bst) <- c('super.Booster', 'xgb.Booster')
class(bst) <- c("super.Booster", "xgb.Booster")
expect_error(pr <- predict(bst, train_dense), regexp = NA)
expect_equal(pr0, pr)
})
test_that("max_delta_step works", {
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
param <- list(
objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
nthread = n_threads,
eta = 0.5
)
nrounds <- 5
# model with no restriction on max_delta_step
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
@ -395,14 +461,16 @@ test_that("colsample_bytree works", {
test_y <- as.numeric(rowSums(test_x) > 0)
colnames(train_x) <- paste0("Feature_", sprintf("%03d", 1:100))
colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
dtrain <- xgb.DMatrix(train_x, label = train_y)
dtest <- xgb.DMatrix(test_x, label = test_y)
dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
watchlist <- list(train = dtrain, eval = dtest)
## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
## each tree
param <- list(max_depth = 2, eta = 0, nthread = 2,
colsample_bytree = 0.01, objective = "binary:logistic",
eval_metric = "auc")
param <- list(
max_depth = 2, eta = 0, nthread = n_threads,
colsample_bytree = 0.01, objective = "binary:logistic",
eval_metric = "auc"
)
set.seed(2)
bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
xgb.importance(model = bst)
@ -412,9 +480,11 @@ test_that("colsample_bytree works", {
})
test_that("Configuration works", {
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
)
config <- xgb.config(bst)
xgb.config(bst) <- config
reloaded_config <- xgb.config(bst)
@ -451,22 +521,26 @@ test_that("strict_shape works", {
y <- as.numeric(iris$Species) - 1
X <- as.matrix(iris[, -5])
bst <- xgboost(data = X, label = y,
max_depth = 2, nrounds = n_rounds,
objective = "multi:softprob", num_class = 3, eval_metric = "merror")
bst <- xgboost(
data = X, label = y,
max_depth = 2, nrounds = n_rounds, nthread = n_threads,
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
)
test_strict_shape(bst, X, 3)
}
test_agaricus <- function() {
data(agaricus.train, package = 'xgboost')
data(agaricus.train, package = "xgboost")
X <- agaricus.train$data
y <- agaricus.train$label
bst <- xgboost(data = X, label = y, max_depth = 2,
nrounds = n_rounds, objective = "binary:logistic",
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
bst <- xgboost(
data = X, label = y, max_depth = 2, nthread = n_threads,
nrounds = n_rounds, objective = "binary:logistic",
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
)
test_strict_shape(bst, X, 1)
}
@ -481,8 +555,10 @@ test_that("'predict' accepts CSR data", {
x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
x_csr <- as(x_csc, "RsparseMatrix")
x_spv <- as(x_csc, "sparseVector")
bst <- xgboost(data = X, label = y, objective = "binary:logistic",
nrounds = 5L, verbose = FALSE)
bst <- xgboost(
data = X, label = y, objective = "binary:logistic",
nrounds = 5L, verbose = FALSE, nthread = n_threads,
)
p_csc <- predict(bst, x_csc)
p_csr <- predict(bst, x_csr)
p_spv <- predict(bst, x_spv)

View File

@ -6,6 +6,8 @@ data(agaricus.test, package = 'xgboost')
train <- agaricus.train
test <- agaricus.test
n_threads <- 2
# add some label noise for early stopping tests
add.noise <- function(label, frac) {
inoise <- sample(length(label), length(label) * frac)
@ -15,15 +17,15 @@ add.noise <- function(label, frac) {
set.seed(11)
ltrain <- add.noise(train$label, 0.2)
ltest <- add.noise(test$label, 0.2)
dtrain <- xgb.DMatrix(train$data, label = ltrain)
dtest <- xgb.DMatrix(test$data, label = ltest)
dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
watchlist <- list(train = dtrain, test = dtest)
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
param <- list(objective = "binary:logistic", eval_metric = "error",
max_depth = 2, nthread = 2)
max_depth = 2, nthread = n_threads)
test_that("cb.print.evaluation works as expected", {
@ -103,7 +105,7 @@ test_that("cb.evaluation.log works as expected", {
param <- list(objective = "binary:logistic", eval_metric = "error",
max_depth = 4, nthread = 2)
max_depth = 4, nthread = n_threads)
test_that("can store evaluation_log without printing", {
expect_silent(
@ -179,8 +181,10 @@ test_that("cb.save.model works as expected", {
expect_true(file.exists('xgboost_01.json'))
expect_true(file.exists('xgboost_02.json'))
b1 <- xgb.load('xgboost_01.json')
xgb.parameters(b1) <- list(nthread = 2)
expect_equal(xgb.ntree(b1), 1)
b2 <- xgb.load('xgboost_02.json')
xgb.parameters(b2) <- list(nthread = 2)
expect_equal(xgb.ntree(b2), 2)
xgb.config(b2) <- xgb.config(bst)
@ -267,7 +271,8 @@ test_that("early stopping works with titanic", {
objective = "binary:logistic",
eval_metric = "auc",
nrounds = 100,
early_stopping_rounds = 3
early_stopping_rounds = 3,
nthread = n_threads
)
expect_true(TRUE) # should not crash
@ -308,7 +313,7 @@ test_that("prediction in xgb.cv works", {
test_that("prediction in xgb.cv works for gblinear too", {
set.seed(11)
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = 2)
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads)
cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
expect_false(is.null(cv$evaluation_log))
expect_false(is.null(cv$pred))
@ -341,7 +346,7 @@ test_that("prediction in xgb.cv for softprob works", {
set.seed(11)
expect_warning(
cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
eta = 0.5, nrounds = 5, max_depth = 3, nthread = 2,
eta = 0.5, nrounds = 5, max_depth = 3, nthread = n_threads,
subsample = 0.8, gamma = 2, verbose = 0,
prediction = TRUE, objective = "multi:softprob", num_class = 3)
, NA)

View File

@ -2,10 +2,16 @@ context('Test models with custom objective')
set.seed(1994)
n_threads <- 2
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
)
watchlist <- list(eval = dtest, train = dtrain)
logregobj <- function(preds, dtrain) {
@ -22,7 +28,7 @@ evalerror <- function(preds, dtrain) {
return(list(metric = "error", value = err))
}
param <- list(max_depth = 2, eta = 1, nthread = 2,
param <- list(max_depth = 2, eta = 1, nthread = n_threads,
objective = logregobj, eval_metric = evalerror)
num_round <- 2
@ -67,7 +73,7 @@ test_that("custom objective using DMatrix attr works", {
test_that("custom objective with multi-class shape", {
data <- as.matrix(iris[, -5])
label <- as.numeric(iris$Species) - 1
dtrain <- xgb.DMatrix(data = data, label = label)
dtrain <- xgb.DMatrix(data = data, label = label, nthread = n_threads)
n_classes <- 3
fake_softprob <- function(preds, dtrain) {

View File

@ -5,19 +5,21 @@ data(agaricus.test, package = "xgboost")
test_data <- agaricus.test$data[1:100, ]
test_label <- agaricus.test$label[1:100]
n_threads <- 2
test_that("xgb.DMatrix: basic construction", {
# from sparse matrix
dtest1 <- xgb.DMatrix(test_data, label = test_label)
dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
# from dense matrix
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label)
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label, nthread = n_threads)
expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label"))
expect_equal(dim(dtest1), dim(dtest2))
# from dense integer matrix
int_data <- as.matrix(test_data)
storage.mode(int_data) <- "integer"
dtest3 <- xgb.DMatrix(int_data, label = test_label)
dtest3 <- xgb.DMatrix(int_data, label = test_label, nthread = n_threads)
expect_equal(dim(dtest1), dim(dtest3))
n_samples <- 100
@ -29,15 +31,15 @@ test_that("xgb.DMatrix: basic construction", {
X <- matrix(X, nrow = n_samples)
y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)
fd <- xgb.DMatrix(X, label = y, missing = 1)
fd <- xgb.DMatrix(X, label = y, missing = 1, nthread = n_threads)
dgc <- as(X, "dgCMatrix")
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0)
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0, nthread = n_threads)
dgr <- as(X, "dgRMatrix")
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1)
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1, nthread = n_threads)
params <- list(tree_method = "hist")
params <- list(tree_method = "hist", nthread = n_threads)
bst_fd <- xgb.train(
params, nrounds = 8, fd, watchlist = list(train = fd)
)
@ -64,12 +66,12 @@ test_that("xgb.DMatrix: NA", {
)
x[1, "x1"] <- NA
m <- xgb.DMatrix(x)
m <- xgb.DMatrix(x, nthread = n_threads)
xgb.DMatrix.save(m, "int.dmatrix")
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
colnames(x) <- c("x1", "x2")
m <- xgb.DMatrix(x)
m <- xgb.DMatrix(x, nthread = n_threads)
xgb.DMatrix.save(m, "float.dmatrix")
@ -94,7 +96,7 @@ test_that("xgb.DMatrix: NA", {
test_that("xgb.DMatrix: saving, loading", {
# save to a local file
dtest1 <- xgb.DMatrix(test_data, label = test_label)
dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
tmp_file <- tempfile('xgb.DMatrix_')
on.exit(unlink(tmp_file))
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
@ -109,13 +111,17 @@ test_that("xgb.DMatrix: saving, loading", {
tmp_file <- tempfile(fileext = ".libsvm")
writeLines(tmp, tmp_file)
expect_true(file.exists(tmp_file))
dtest4 <- xgb.DMatrix(paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE)
dtest4 <- xgb.DMatrix(
paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE, nthread = n_threads
)
expect_equal(dim(dtest4), c(3, 4))
expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))
# check that feature info is saved
data(agaricus.train, package = 'xgboost')
dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
dtrain <- xgb.DMatrix(
data = agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
cnames <- colnames(dtrain)
expect_equal(length(cnames), 126)
tmp_file <- tempfile('xgb.DMatrix_')
@ -129,7 +135,7 @@ test_that("xgb.DMatrix: saving, loading", {
})
test_that("xgb.DMatrix: getinfo & setinfo", {
dtest <- xgb.DMatrix(test_data)
dtest <- xgb.DMatrix(test_data, nthread = n_threads)
expect_true(setinfo(dtest, 'label', test_label))
labels <- getinfo(dtest, 'label')
expect_equal(test_label, getinfo(dtest, 'label'))
@ -156,7 +162,7 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
})
test_that("xgb.DMatrix: slice, dim", {
dtest <- xgb.DMatrix(test_data, label = test_label)
dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
expect_equal(dim(dtest), dim(test_data))
dsub1 <- slice(dtest, 1:42)
expect_equal(nrow(dsub1), 42)
@ -171,16 +177,20 @@ test_that("xgb.DMatrix: slice, trailing empty rows", {
data(agaricus.train, package = 'xgboost')
train_data <- agaricus.train$data
train_label <- agaricus.train$label
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
dtrain <- xgb.DMatrix(
data = train_data, label = train_label, nthread = n_threads
)
slice(dtrain, 6513L)
train_data[6513, ] <- 0
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
dtrain <- xgb.DMatrix(
data = train_data, label = train_label, nthread = n_threads
)
slice(dtrain, 6513L)
expect_equal(nrow(dtrain), 6513)
})
test_that("xgb.DMatrix: colnames", {
dtest <- xgb.DMatrix(test_data, label = test_label)
dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
expect_equal(colnames(dtest), colnames(test_data))
expect_error(colnames(dtest) <- 'asdf')
new_names <- make.names(seq_len(ncol(test_data)))
@ -196,7 +206,7 @@ test_that("xgb.DMatrix: nrow is correct for a very sparse matrix", {
x <- Matrix::rsparsematrix(nr, 100, density = 0.0005)
# we want it very sparse, so that last rows are empty
expect_lt(max(x@i), nr)
dtest <- xgb.DMatrix(x)
dtest <- xgb.DMatrix(x, nthread = n_threads)
expect_equal(dim(dtest), dim(x))
})
@ -205,8 +215,8 @@ test_that("xgb.DMatrix: print", {
# core DMatrix with just data and labels
dtrain <- xgb.DMatrix(
data = agaricus.train$data
, label = agaricus.train$label
data = agaricus.train$data, label = agaricus.train$label,
nthread = n_threads
)
txt <- capture.output({
print(dtrain)
@ -222,10 +232,11 @@ test_that("xgb.DMatrix: print", {
# DMatrix with weights and base_margin
dtrain <- xgb.DMatrix(
data = agaricus.train$data
, label = agaricus.train$label
, weight = seq_along(agaricus.train$label)
, base_margin = agaricus.train$label
data = agaricus.train$data,
label = agaricus.train$label,
weight = seq_along(agaricus.train$label),
base_margin = agaricus.train$label,
nthread = n_threads
)
txt <- capture.output({
print(dtrain)
@ -234,7 +245,8 @@ test_that("xgb.DMatrix: print", {
# DMatrix with just features
dtrain <- xgb.DMatrix(
data = agaricus.train$data
data = agaricus.train$data,
nthread = n_threads
)
txt <- capture.output({
print(dtrain)
@ -245,7 +257,8 @@ test_that("xgb.DMatrix: print", {
data_no_colnames <- agaricus.train$data
colnames(data_no_colnames) <- NULL
dtrain <- xgb.DMatrix(
data = data_no_colnames
data = data_no_colnames,
nthread = n_threads
)
txt <- capture.output({
print(dtrain)

View File

@ -1,5 +1,7 @@
context("feature weights")
n_threads <- 2
test_that("training with feature weights works", {
nrows <- 1000
ncols <- 9
@ -10,8 +12,12 @@ test_that("training with feature weights works", {
test <- function(tm) {
names <- paste0("f", 1:ncols)
xy <- xgb.DMatrix(data = x, label = y, feature_weights = weights)
params <- list(colsample_bynode = 0.4, tree_method = tm, nthread = 1)
xy <- xgb.DMatrix(
data = x, label = y, feature_weights = weights, nthread = n_threads
)
params <- list(
colsample_bynode = 0.4, tree_method = tm, nthread = n_threads
)
model <- xgb.train(params = params, data = xy, nrounds = 32)
importance <- xgb.importance(model = model, feature_names = names)
expect_equal(dim(importance), c(ncols, 4))

View File

@ -1,13 +1,19 @@
context('Test generalized linear models')
n_threads <- 2
test_that("gblinear works", {
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
)
param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
watchlist <- list(eval = dtest, train = dtrain)
n <- 5 # iterations
@ -48,12 +54,16 @@ test_that("gblinear works", {
test_that("gblinear early stopping works", {
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
)
param <- list(
objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
updater = "coord_descent"
)

View File

@ -171,6 +171,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
fit <- xgboost(
params = c(
list(
nthread = 2,
booster = booster,
objective = "reg:squarederror",
eval_metric = "rmse"),
@ -257,7 +258,7 @@ test_that("xgb.Booster serializing as R object works", {
.skip_if_vcd_not_available()
saveRDS(bst.Tree, 'xgb.model.rds')
bst <- readRDS('xgb.model.rds')
dtrain <- xgb.DMatrix(sparse_matrix, label = label)
dtrain <- xgb.DMatrix(sparse_matrix, label = label, nthread = 2)
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
xgb.save(bst, 'xgb.model')
@ -363,7 +364,8 @@ test_that("xgb.importance works with and without feature names", {
data = as.matrix(data.frame(x = c(0, 1))),
label = c(1, 2),
nrounds = 1,
base_score = 0.5
base_score = 0.5,
nthread = 2
)
df <- xgb.model.dt.tree(model = m)
expect_equal(df$Feature, "Leaf")

View File

@ -2,6 +2,8 @@ require(xgboost)
context("interaction constraints")
n_threads <- 2
set.seed(1024)
x1 <- rnorm(1000, 1)
x2 <- rnorm(1000, 1)
@ -45,11 +47,18 @@ test_that("interaction constraints scientific representation", {
d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols)
y <- rnorm(rows)
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))
with_inc <- xgb.train(data = dtrain, tree_method = 'hist',
interaction_constraints = inc, nrounds = 10)
without_inc <- xgb.train(data = dtrain, tree_method = 'hist', nrounds = 10)
with_inc <- xgb.train(
data = dtrain,
tree_method = 'hist',
interaction_constraints = inc,
nrounds = 10,
nthread = n_threads
)
without_inc <- xgb.train(
data = dtrain, tree_method = 'hist', nrounds = 10, nthread = n_threads
)
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
})

View File

@ -1,6 +1,7 @@
context('Test prediction of feature interactions')
set.seed(123)
n_threads <- 2
test_that("predict feature interactions works", {
# simulate some binary data and a linear outcome with an interaction term
@ -19,8 +20,10 @@ test_that("predict feature interactions works", {
y <- f_int(X)
dm <- xgb.DMatrix(X, label = y)
param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
dm <- xgb.DMatrix(X, label = y, nthread = n_threads)
param <- list(
eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = n_threads
)
b <- xgb.train(param, dm, 100)
pred <- predict(b, dm, outputmargin = TRUE)
@ -99,11 +102,13 @@ test_that("SHAP contribution values are not NAN", {
verbose = 0,
params = list(
objective = "reg:squarederror",
eval_metric = "rmse"),
eval_metric = "rmse",
nthread = n_threads
),
data = as.matrix(subset(d, fold == 2)[, ivs]),
label = subset(d, fold == 2)$y,
nthread = 1,
nrounds = 3)
nrounds = 3
)
shaps <- as.data.frame(predict(fit,
newdata = as.matrix(subset(d, fold == 1)[, ivs]),
@ -116,8 +121,12 @@ test_that("SHAP contribution values are not NAN", {
test_that("multiclass feature interactions work", {
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
dm <- xgb.DMatrix(
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
)
param <- list(
eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3, nthread = n_threads
)
b <- xgb.train(param, dm, 40)
pred <- t(
array(
@ -166,6 +175,7 @@ test_that("SHAP single sample works", {
max_depth = 2,
nrounds = 4,
objective = "binary:logistic",
nthread = n_threads
)
predt <- predict(

View File

@ -9,7 +9,8 @@ test_that("load/save raw works", {
nrounds <- 8
booster <- xgboost(
data = train$data, label = train$label,
nrounds = nrounds, objective = "binary:logistic"
nrounds = nrounds, objective = "binary:logistic",
nthread = 2
)
json_bytes <- xgb.save.raw(booster, raw_format = "json")

View File

@ -66,7 +66,7 @@ test_that("Models from previous versions of XGBoost can be loaded", {
unzip(zipfile, exdir = extract_dir, overwrite = TRUE)
model_dir <- file.path(extract_dir, 'models')
pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4))
pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4), nthread = 2)
lapply(list.files(model_dir), function (x) {
model_file <- file.path(model_dir, x)
@ -87,6 +87,7 @@ test_that("Models from previous versions of XGBoost can be loaded", {
booster <- readRDS(model_file)
} else {
booster <- xgb.load(model_file)
xgb.parameters(booster) <- list(nthread = 2)
}
predict(booster, newdata = pred_data)
run_booster_check(booster, name)

View File

@ -3,8 +3,12 @@ context('Test model params and call are exposed to R')
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = 2
)
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = 2
)
bst <- xgboost(data = dtrain,
max_depth = 2,

View File

@ -4,8 +4,10 @@ set.seed(1994)
test_that("Poisson regression works", {
data(mtcars)
bst <- xgboost(data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
objective = 'count:poisson', nrounds = 10, verbose = 0)
bst <- xgboost(
data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
objective = 'count:poisson', nrounds = 10, verbose = 0, nthread = 2
)
expect_equal(class(bst), "xgb.Booster")
pred <- predict(bst, as.matrix(mtcars[, -11]))
expect_equal(length(pred), 32)

View File

@ -1,5 +1,7 @@
context('Learning to rank')
n_threads <- 2
test_that('Test ranking with unweighted data', {
X <- Matrix::sparseMatrix(
i = c(2, 3, 7, 9, 12, 15, 17, 18)
@ -9,10 +11,10 @@ test_that('Test ranking with unweighted data', {
)
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
group <- c(5, 5, 5, 5)
dtrain <- xgb.DMatrix(X, label = y, group = group)
dtrain <- xgb.DMatrix(X, label = y, group = group, nthread = n_threads)
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
eval_metric = 'auc', eval_metric = 'aucpr')
eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
# Check if the metric is monotone increasing
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
@ -29,10 +31,14 @@ test_that('Test ranking with weighted data', {
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
group <- c(5, 5, 5, 5)
weight <- c(1.0, 2.0, 3.0, 4.0)
dtrain <- xgb.DMatrix(X, label = y, group = group, weight = weight)
dtrain <- xgb.DMatrix(
X, label = y, group = group, weight = weight, nthread = n_threads
)
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
eval_metric = 'auc', eval_metric = 'aucpr')
params <- list(
eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
)
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
# Check if the metric is monotone increasing
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))

View File

@ -16,6 +16,7 @@ test_that("Can save and load models with Unicode paths", {
path <- file.path(tmpdir, x)
xgb.save(bst, path)
bst2 <- xgb.load(path)
xgb.parameters(bst2) <- list(nthread = 2)
expect_equal(predict(bst, test$data), predict(bst2, test$data))
})
})

View File

@ -2,8 +2,15 @@ context("update trees in an existing model")
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
n_threads <- 1
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
)
# Disable flaky tests for 32-bit Windows.
# See https://github.com/dmlc/xgboost/issues/3720
@ -14,7 +21,7 @@ test_that("updating the model works", {
# no-subsampling
p1 <- list(
objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2,
objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = n_threads,
updater = "grow_colmaker,prune"
)
set.seed(11)
@ -86,9 +93,11 @@ test_that("updating the model works", {
})
test_that("updating works for multiclass & multitree", {
dtr <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
dtr <- xgb.DMatrix(
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
)
watchlist <- list(train = dtr)
p0 <- list(max_depth = 2, eta = 0.5, nthread = 2, subsample = 0.6,
p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
base_score = 0)
set.seed(121)

View File

@ -31,6 +31,8 @@ require(data.table)
if (!require('vcd')) {
install.packages('vcd')
}
data.table::setDTthreads(2)
```
> **VCD** package is used for one of its embedded dataset only.
@ -297,23 +299,25 @@ test <- agaricus.test
#Random Forest - 1000 trees
bst <- xgboost(
data = train$data
, label = train$label
, max_depth = 4
, num_parallel_tree = 1000
, subsample = 0.5
, colsample_bytree = 0.5
, nrounds = 1
, objective = "binary:logistic"
data = train$data,
label = train$label,
max_depth = 4,
num_parallel_tree = 1000,
subsample = 0.5,
colsample_bytree = 0.5,
nrounds = 1,
objective = "binary:logistic",
nthread = 2
)
#Boosting - 3 rounds
bst <- xgboost(
data = train$data
, label = train$label
, max_depth = 4
, nrounds = 3
, objective = "binary:logistic"
data = train$data,
label = train$label,
max_depth = 4,
nrounds = 3,
objective = "binary:logistic",
nthread = 2
)
```

View File

@ -86,9 +86,10 @@ data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
nrounds = 2, objective = "binary:logistic")
nrounds = 2, objective = "binary:logistic", nthread = 2)
xgb.save(bst, 'model.save')
bst = xgb.load('model.save')
xgb.parameters(bst) <- list(nthread = 2)
pred <- predict(bst, test$data)
@
@ -127,7 +128,7 @@ training from initial prediction value, weighted training instance.
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
<<xgb.DMatrix>>=
dtrain <- xgb.DMatrix(train$data, label = train$label)
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)
class(dtrain)
head(getinfo(dtrain,'label'))
@
@ -161,9 +162,9 @@ evalerror <- function(preds, dtrain) {
return(list(metric = "MSE", value = err))
}
dtest <- xgb.DMatrix(test$data, label = test$label)
dtest <- xgb.DMatrix(test$data, label = test$label, nthread = 2)
watchlist <- list(eval = dtest, train = dtrain)
param <- list(max_depth = 2, eta = 1)
param <- list(max_depth = 2, eta = 1, nthread = 2)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
@

View File

@ -173,13 +173,13 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**
```{r trainingDense, message=F, warning=F}
bstDense <- xgboost(
data = as.matrix(train$data)
, label = train$label
, max_depth = 2
, eta = 1
, nthread = 2
, nrounds = 2
, objective = "binary:logistic"
data = as.matrix(train$data),
label = train$label,
max_depth = 2,
eta = 1,
nthread = 2,
nrounds = 2,
objective = "binary:logistic"
)
```
@ -188,14 +188,14 @@ bstDense <- xgboost(
**XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.
```{r trainingDmatrix, message=F, warning=F}
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
bstDMatrix <- xgboost(
data = dtrain
, max_depth = 2
, eta = 1
, nthread = 2
, nrounds = 2
, objective = "binary:logistic"
data = dtrain,
max_depth = 2,
eta = 1,
nthread = 2,
nrounds = 2,
objective = "binary:logistic"
)
```
@ -314,8 +314,8 @@ Most of the features below have been implemented to help you to improve your mod
For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
```{r DMatrix, message=F, warning=F}
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
dtest <- xgb.DMatrix(data = test$data, label = test$label)
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
dtest <- xgb.DMatrix(data = test$data, label = test$label, nthread = 2)
```
### Measure learning progress with xgb.train
@ -476,6 +476,7 @@ An interesting test to see how identical our saved model is to the original one
```{r loadModel, message=F, warning=F}
# load binary model to R
bst2 <- xgb.load("xgboost.model")
xgb.parameters(bst2) <- list(nthread = 2)
pred2 <- predict(bst2, test$data)
# And now the test
@ -500,6 +501,7 @@ print(class(rawVec))
# load binary model to R
bst3 <- xgb.load(rawVec)
xgb.parameters(bst3) <- list(nthread = 2)
pred3 <- predict(bst3, test$data)
# pred2 should be identical to pred

View File

@ -175,7 +175,7 @@ bst_preds == bst_from_json_preds
None are exactly equal again. What is going on here? Well, since we are using the value `1` in the calculations, we have introduced a double into the calculation. Because of this, all float values are promoted to 64-bit doubles and the 64-bit version of the exponentiation operator `exp` is also used. On the other hand, xgboost uses the 32-bit version of the exponentiation operator in its [sigmoid function](https://github.com/dmlc/xgboost/blob/54980b8959680a0da06a3fc0ec776e47c8cbb0a1/src/common/math.h#L25-L27).
How do we fix this? We have to ensure we use the correct data types everywhere and the correct operators. If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
How do we fix this? We have to ensure we use the correct data types everywhere and the correct operators. If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
```{r}
# calculate the predictions casting doubles to floats
bst_from_json_preds <- ifelse(

View File

@ -80,6 +80,24 @@ R package versioning
====================
See :ref:`release`.
Testing R package with different compilers
==========================================
You can change the default compiler of R by changing the configuration file in home
directory. For instance, if you want to test XGBoost built with clang++ instead of g++ on
Linux, put the following in your ``~/.R/Makevars`` file:
.. code-block:: sh
CC=clang-15
CXX17=clang++-15
Be aware that the variable name should match with the name used by ``R CMD``:
.. code-block:: sh
R CMD config CXX17
Registering native routines in R
================================
According to `R extension manual <https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines>`_,

View File

@ -50,6 +50,7 @@ inline void EllpackPageSource::Fetch() {
// silent the warning about unused variables.
(void)(row_stride_);
(void)(is_dense_);
(void)(device_);
common::AssertGPUSupport();
}
#endif // !defined(XGBOOST_USE_CUDA)

View File

@ -106,14 +106,30 @@ void GBTreeModel::Load(dmlc::Stream* fi) {
Validate(*this);
}
namespace {
std::int32_t IOThreads(Context const* ctx) {
CHECK(ctx);
std::int32_t n_threads = ctx->Threads();
// CRAN checks for number of threads used by examples, but we might not have the right
// number of threads when serializing/unserializing models as nthread is a booster
// parameter, which is only effective after booster initialization.
//
// The threshold ratio of CPU time to user time for R is 2.5, we set the number of
// threads to 2.
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
n_threads = std::min(2, n_threads);
#endif
return n_threads;
}
} // namespace
void GBTreeModel::SaveModel(Json* p_out) const {
auto& out = *p_out;
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
out["gbtree_model_param"] = ToJson(param);
std::vector<Json> trees_json(trees.size());
CHECK(ctx_);
common::ParallelFor(trees.size(), ctx_->Threads(), [&](auto t) {
common::ParallelFor(trees.size(), IOThreads(ctx_), [&](auto t) {
auto const& tree = trees[t];
Json jtree{Object{}};
tree->SaveModel(&jtree);
@ -151,9 +167,7 @@ void GBTreeModel::LoadModel(Json const& in) {
CHECK_EQ(tree_info_json.size(), param.num_trees);
tree_info.resize(param.num_trees);
CHECK(ctx_);
common::ParallelFor(param.num_trees, ctx_->Threads(), [&](auto t) {
common::ParallelFor(param.num_trees, IOThreads(ctx_), [&](auto t) {
auto tree_id = get<Integer const>(trees_json[t]["id"]);
trees.at(tree_id).reset(new RegTree{});
trees[tree_id]->LoadModel(trees_json[t]);

View File

@ -3,9 +3,15 @@ import argparse
import os
import shutil
import subprocess
from io import StringIO
from pathlib import Path
from platform import system
try:
import pandas as pd
except ImportError:
pd = None
from test_utils import R_PACKAGE, ROOT, DirectoryExcursion, cd, print_time, record_time
@ -97,16 +103,47 @@ def build_rpackage(path: str) -> str:
return tarball
def check_example_timing(rcheck_dir: Path, threshold: float) -> None:
with open(rcheck_dir / "xgboost-Ex.timings", "r") as fd:
timings = fd.readlines()
newlines = []
for line in timings:
line = line.strip()
newlines.append(line)
con_timings = "\n".join(newlines)
df = pd.read_csv(StringIO(con_timings), delimiter="\t")
ratio_n = "user/elapsed"
df[ratio_n] = df["user"] / df["elapsed"]
offending = df[df[ratio_n] > threshold]
try:
# requires the tabulate package
df.to_markdown("timings.md")
offending.to_markdown("offending.md")
except ImportError:
print("failed to export markdown files.")
pass
if offending.shape[0] == 0:
return
print(offending)
raise ValueError("There are examples using too many threads")
@cd(ROOT)
@record_time
def check_rpackage(path: str) -> None:
env = os.environ.copy()
print("Ncpus:", f"{os.cpu_count()}")
threshold = 2.5
env.update(
{
"MAKEFLAGS": f"-j{os.cpu_count()}",
# cran specific environment variables
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(2.5),
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
"_R_CHECK_TEST_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
"_R_CHECK_VIGNETTE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
}
)
@ -118,11 +155,14 @@ def check_rpackage(path: str) -> None:
CC = os.path.join(mingw_bin, "gcc.exe")
env.update({"CC": CC, "CXX": CXX})
status = subprocess.run([R, "CMD", "check", "--as-cran", path], env=env)
with open(Path("xgboost.Rcheck") / "00check.log", "r") as fd:
status = subprocess.run(
[R, "CMD", "check", "--as-cran", "--timings", path], env=env
)
rcheck_dir = Path("xgboost.Rcheck")
with open(rcheck_dir / "00check.log", "r") as fd:
check_log = fd.read()
with open(Path("xgboost.Rcheck") / "00install.out", "r") as fd:
with open(rcheck_dir / "00install.out", "r") as fd:
install_log = fd.read()
msg = f"""
@ -144,6 +184,8 @@ def check_rpackage(path: str) -> None:
if check_log.find("Examples with CPU time") != -1:
print(msg)
raise ValueError("Suspicious NOTE.")
if pd is not None:
check_example_timing(rcheck_dir, threshold)
@cd(R_PACKAGE)
@ -264,6 +306,8 @@ def main(args: argparse.Namespace) -> None:
test_with_autotools()
else:
test_with_cmake(args)
elif args.task == "timings":
check_example_timing(Path("xgboost.Rcheck"), 2.5)
else:
raise ValueError("Unexpected task.")
@ -279,7 +323,7 @@ if __name__ == "__main__":
parser.add_argument(
"--task",
type=str,
choices=["pack", "build", "check", "doc"],
choices=["pack", "build", "check", "doc", "timings"],
default="check",
required=False,
)