[R] Set number of threads in demos and tests. (#9591)
- Restrict the number of threads in IO. - Specify the number of threads in demos and tests. - Add helper scripts for checks.
This commit is contained in:
parent
def77870f3
commit
cac2cd2e94
@ -4,3 +4,5 @@
|
||||
^.*\.Rproj$
|
||||
^\.Rproj\.user$
|
||||
README.md
|
||||
^doc$
|
||||
^Meta$
|
||||
|
||||
@ -557,14 +557,18 @@ cb.cv.predict <- function(save_models = FALSE) {
|
||||
#'
|
||||
#' @examples
|
||||
#' #### Binary classification:
|
||||
#' #
|
||||
#'
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||
#' # without considering the 2nd order interactions:
|
||||
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||
#' colnames(x)
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
|
||||
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
||||
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
|
||||
@ -594,9 +598,9 @@ cb.cv.predict <- function(save_models = FALSE) {
|
||||
#'
|
||||
#' #### Multiclass classification:
|
||||
#' #
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
|
||||
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 1)
|
||||
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
||||
#' # to use smaller eta to reduce instability
|
||||
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
||||
|
||||
@ -267,11 +267,16 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||
#'
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#'
|
||||
#' ## Keep the number of threads to 2 for examples
|
||||
#' nthread <- 2
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#'
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
#' eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
|
||||
#' eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
|
||||
#' # use all trees by default
|
||||
#' pred <- predict(bst, test$data)
|
||||
#' # use only the 1st tree
|
||||
@ -337,8 +342,14 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
|
||||
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
||||
|
||||
if (!inherits(newdata, "xgb.DMatrix"))
|
||||
newdata <- xgb.DMatrix(newdata, missing = missing, nthread = NVL(object$params[["nthread"]], -1))
|
||||
if (!inherits(newdata, "xgb.DMatrix")) {
|
||||
config <- jsonlite::fromJSON(xgb.config(object))
|
||||
nthread <- strtoi(config$learner$generic_param$nthread)
|
||||
newdata <- xgb.DMatrix(
|
||||
newdata,
|
||||
missing = missing, nthread = NVL(nthread, -1)
|
||||
)
|
||||
}
|
||||
if (!is.null(object[["feature_names"]]) &&
|
||||
!is.null(colnames(newdata)) &&
|
||||
!identical(object[["feature_names"]], colnames(newdata)))
|
||||
@ -628,10 +639,15 @@ xgb.attributes <- function(object) {
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#' train <- agaricus.train
|
||||
#'
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
#' bst <- xgboost(
|
||||
#' data = train$data, label = train$label, max_depth = 2,
|
||||
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||
#' )
|
||||
#' config <- xgb.config(bst)
|
||||
#'
|
||||
#' @rdname xgb.config
|
||||
|
||||
@ -18,7 +18,12 @@
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#' dtrain <- with(
|
||||
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||
#' )
|
||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||
|
||||
@ -22,14 +22,23 @@
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#'
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
#' bst <- xgboost(
|
||||
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||
#' nthread = nthread,
|
||||
#' nrounds = 2,
|
||||
#' objective = "binary:logistic"
|
||||
#' )
|
||||
#'
|
||||
#' xgb.save(bst, 'xgb.model')
|
||||
#' bst <- xgb.load('xgb.model')
|
||||
#' if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||
#' pred <- predict(bst, test$data)
|
||||
#' @export
|
||||
xgb.load <- function(modelfile) {
|
||||
if (is.null(modelfile))
|
||||
|
||||
@ -46,9 +46,12 @@
|
||||
#' # Basic use:
|
||||
#'
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||
#'
|
||||
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
||||
#'
|
||||
|
||||
@ -45,10 +45,13 @@
|
||||
#' @examples
|
||||
#'
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' ## Keep the number of threads to 2 for examples
|
||||
#' nthread <- 2
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' # Change max_depth to a higher number to get a more significant result
|
||||
#' ## Change max_depth to a higher number to get a more significant result
|
||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
||||
#' eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
|
||||
#' eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
|
||||
#' subsample = 0.5, min_child_weight = 2)
|
||||
#'
|
||||
#' xgb.plot.deepness(bst)
|
||||
|
||||
@ -45,9 +45,14 @@
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train)
|
||||
#' ## Keep the number of threads to 2 for examples
|
||||
#' nthread <- 2
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
#' bst <- xgboost(
|
||||
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||
#' )
|
||||
#'
|
||||
#' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
||||
#'
|
||||
|
||||
@ -43,10 +43,15 @@
|
||||
#' @examples
|
||||
#'
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' ## Keep the number of threads to 2 for examples
|
||||
#' nthread <- 2
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||
#' eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
||||
#' min_child_weight = 50, verbose = 0)
|
||||
#' bst <- xgboost(
|
||||
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||
#' eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
|
||||
#' min_child_weight = 50, verbose = 0
|
||||
#' )
|
||||
#'
|
||||
#' p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
||||
#' print(p)
|
||||
|
||||
@ -74,9 +74,14 @@
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#'
|
||||
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#' nrounds <- 20
|
||||
#'
|
||||
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
|
||||
#' eta = 0.1, max_depth = 3, subsample = .5,
|
||||
#' method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
|
||||
#' method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
|
||||
#'
|
||||
#' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
||||
#' contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
||||
@ -85,12 +90,11 @@
|
||||
#'
|
||||
#' # multiclass example - plots for each class separately:
|
||||
#' nclass <- 3
|
||||
#' nrounds <- 20
|
||||
#' x <- as.matrix(iris[, -5])
|
||||
#' set.seed(123)
|
||||
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
||||
#' mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
||||
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
|
||||
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
|
||||
#' objective = "multi:softprob", num_class = nclass, verbose = 0)
|
||||
#' trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
||||
#' col <- rgb(0, 0, 1, 0.5)
|
||||
|
||||
@ -25,14 +25,22 @@
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#'
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
#' bst <- xgboost(
|
||||
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||
#' nthread = nthread,
|
||||
#' nrounds = 2,
|
||||
#' objective = "binary:logistic"
|
||||
#' )
|
||||
#' xgb.save(bst, 'xgb.model')
|
||||
#' bst <- xgb.load('xgb.model')
|
||||
#' if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||
#' pred <- predict(bst, test$data)
|
||||
#' @export
|
||||
xgb.save <- function(model, fname) {
|
||||
if (typeof(fname) != "character")
|
||||
|
||||
@ -16,13 +16,18 @@
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#'
|
||||
#' ## Keep the number of threads to 2 for examples
|
||||
#' nthread <- 2
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||
#'
|
||||
#' raw <- xgb.save.raw(bst)
|
||||
#' bst <- xgb.load.raw(raw)
|
||||
#' pred <- predict(bst, test$data)
|
||||
#'
|
||||
#' @export
|
||||
xgb.save.raw <- function(model, raw_format = "deprecated") {
|
||||
|
||||
@ -168,7 +168,8 @@
|
||||
#' than the \code{xgboost} interface.
|
||||
#'
|
||||
#' Parallelization is automatically enabled if \code{OpenMP} is present.
|
||||
#' Number of threads can also be manually specified via \code{nthread} parameter.
|
||||
#' Number of threads can also be manually specified via the \code{nthread}
|
||||
#' parameter.
|
||||
#'
|
||||
#' The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
||||
#' when the \code{eval_metric} parameter is not provided.
|
||||
@ -237,17 +238,25 @@
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#'
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' dtrain <- with(
|
||||
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||
#' )
|
||||
#' dtest <- with(
|
||||
#' agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||
#' )
|
||||
#' watchlist <- list(train = dtrain, eval = dtest)
|
||||
#'
|
||||
#' ## A simple xgb.train example:
|
||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||
#' objective = "binary:logistic", eval_metric = "auc")
|
||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||
#'
|
||||
#'
|
||||
#' ## An xgb.train example where custom objective and evaluation metric are used:
|
||||
#' ## An xgb.train example where custom objective and evaluation metric are
|
||||
#' ## used:
|
||||
#' logregobj <- function(preds, dtrain) {
|
||||
#' labels <- getinfo(dtrain, "label")
|
||||
#' preds <- 1/(1 + exp(-preds))
|
||||
@ -263,12 +272,12 @@
|
||||
#'
|
||||
#' # These functions could be used by passing them either:
|
||||
#' # as 'objective' and 'eval_metric' parameters in the params list:
|
||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||
#' objective = logregobj, eval_metric = evalerror)
|
||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||
#'
|
||||
#' # or through the ... arguments:
|
||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
|
||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
|
||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
#' objective = logregobj, eval_metric = evalerror)
|
||||
#'
|
||||
@ -278,7 +287,7 @@
|
||||
#'
|
||||
#'
|
||||
#' ## An xgb.train example of using variable learning rates at each iteration:
|
||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||
#' objective = "binary:logistic", eval_metric = "auc")
|
||||
#' my_etas <- list(eta = c(0.5, 0.1))
|
||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
@ -290,7 +299,7 @@
|
||||
#'
|
||||
#' ## An 'xgboost' interface example:
|
||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
||||
#' max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
||||
#' max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
|
||||
#' objective = "binary:logistic")
|
||||
#' pred <- predict(bst, agaricus.test$data)
|
||||
#'
|
||||
|
||||
@ -35,14 +35,18 @@ Callback function expects the following values to be set in its calling frame:
|
||||
}
|
||||
\examples{
|
||||
#### Binary classification:
|
||||
#
|
||||
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||
# without considering the 2nd order interactions:
|
||||
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||
colnames(x)
|
||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
|
||||
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||
# unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
||||
# rate does not break the convergence, but allows us to illustrate the typical pattern of
|
||||
@ -72,9 +76,9 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||
|
||||
#### Multiclass classification:
|
||||
#
|
||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
|
||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
|
||||
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = 1)
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||
# For the default linear updater 'shotgun' it sometimes is helpful
|
||||
# to use smaller eta to reduce instability
|
||||
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
||||
|
||||
@ -132,11 +132,16 @@ Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple th
|
||||
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
|
||||
eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
|
||||
# use all trees by default
|
||||
pred <- predict(bst, test$data)
|
||||
# use only the 1st tree
|
||||
|
||||
@ -38,7 +38,12 @@ Supported input file formats are either a LIBSVM text file or a binary file that
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
dtrain <- with(
|
||||
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||
)
|
||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||
|
||||
@ -19,10 +19,15 @@ Accessors for model parameters as JSON string.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
train <- agaricus.train
|
||||
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
bst <- xgboost(
|
||||
data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||
)
|
||||
config <- xgb.config(bst)
|
||||
|
||||
}
|
||||
|
||||
@ -27,14 +27,23 @@ not \code{xgb.load}.
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
bst <- xgboost(
|
||||
data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||
nthread = nthread,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
|
||||
xgb.save(bst, 'xgb.model')
|
||||
bst <- xgb.load('xgb.model')
|
||||
if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||
pred <- predict(bst, test$data)
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
|
||||
|
||||
@ -66,9 +66,12 @@ Parse a boosted tree model text dump into a \code{data.table} structure.
|
||||
# Basic use:
|
||||
|
||||
data(agaricus.train, package='xgboost')
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||
|
||||
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
||||
|
||||
|
||||
@ -61,10 +61,13 @@ This function was inspired by the blog post
|
||||
\examples{
|
||||
|
||||
data(agaricus.train, package='xgboost')
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
# Change max_depth to a higher number to get a more significant result
|
||||
## Change max_depth to a higher number to get a more significant result
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
||||
eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
|
||||
eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
|
||||
subsample = 0.5, min_child_weight = 2)
|
||||
|
||||
xgb.plot.deepness(bst)
|
||||
|
||||
@ -77,9 +77,14 @@ with bar colors corresponding to different clusters that have somewhat similar i
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train)
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
bst <- xgboost(
|
||||
data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||
)
|
||||
|
||||
importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
||||
|
||||
|
||||
@ -63,10 +63,15 @@ This function is inspired by this blog post:
|
||||
\examples{
|
||||
|
||||
data(agaricus.train, package='xgboost')
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||
eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
||||
min_child_weight = 50, verbose = 0)
|
||||
bst <- xgboost(
|
||||
data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||
eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
|
||||
min_child_weight = 50, verbose = 0
|
||||
)
|
||||
|
||||
p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
||||
print(p)
|
||||
|
||||
@ -124,9 +124,14 @@ a meaningful thing to do.
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
nrounds <- 20
|
||||
|
||||
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
|
||||
eta = 0.1, max_depth = 3, subsample = .5,
|
||||
method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
|
||||
method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
|
||||
|
||||
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
||||
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
||||
@ -135,12 +140,11 @@ xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12) # S
|
||||
|
||||
# multiclass example - plots for each class separately:
|
||||
nclass <- 3
|
||||
nrounds <- 20
|
||||
x <- as.matrix(iris[, -5])
|
||||
set.seed(123)
|
||||
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
||||
mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
||||
max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
|
||||
max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
|
||||
objective = "multi:softprob", num_class = nclass, verbose = 0)
|
||||
trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
||||
col <- rgb(0, 0, 1, 0.5)
|
||||
|
||||
@ -31,14 +31,22 @@ releases of XGBoost.
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
bst <- xgboost(
|
||||
data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||
nthread = nthread,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
xgb.save(bst, 'xgb.model')
|
||||
bst <- xgb.load('xgb.model')
|
||||
if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||
pred <- predict(bst, test$data)
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
|
||||
|
||||
@ -25,12 +25,17 @@ Save xgboost model from xgboost or xgb.train
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||
|
||||
raw <- xgb.save.raw(bst)
|
||||
bst <- xgb.load.raw(raw)
|
||||
pred <- predict(bst, test$data)
|
||||
|
||||
}
|
||||
|
||||
@ -250,7 +250,8 @@ customized objective and evaluation metric functions, therefore it is more flexi
|
||||
than the \code{xgboost} interface.
|
||||
|
||||
Parallelization is automatically enabled if \code{OpenMP} is present.
|
||||
Number of threads can also be manually specified via \code{nthread} parameter.
|
||||
Number of threads can also be manually specified via the \code{nthread}
|
||||
parameter.
|
||||
|
||||
The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
||||
when the \code{eval_metric} parameter is not provided.
|
||||
@ -286,17 +287,25 @@ The following callbacks are automatically created when certain parameters are se
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
dtrain <- with(
|
||||
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||
)
|
||||
dtest <- with(
|
||||
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||
)
|
||||
watchlist <- list(train = dtrain, eval = dtest)
|
||||
|
||||
## A simple xgb.train example:
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||
objective = "binary:logistic", eval_metric = "auc")
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||
|
||||
|
||||
## An xgb.train example where custom objective and evaluation metric are used:
|
||||
## An xgb.train example where custom objective and evaluation metric are
|
||||
## used:
|
||||
logregobj <- function(preds, dtrain) {
|
||||
labels <- getinfo(dtrain, "label")
|
||||
preds <- 1/(1 + exp(-preds))
|
||||
@ -312,12 +321,12 @@ evalerror <- function(preds, dtrain) {
|
||||
|
||||
# These functions could be used by passing them either:
|
||||
# as 'objective' and 'eval_metric' parameters in the params list:
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||
|
||||
# or through the ... arguments:
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
|
||||
@ -327,7 +336,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
|
||||
|
||||
## An xgb.train example of using variable learning rates at each iteration:
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||
objective = "binary:logistic", eval_metric = "auc")
|
||||
my_etas <- list(eta = c(0.5, 0.1))
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
@ -339,7 +348,7 @@ bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
|
||||
|
||||
## An 'xgboost' interface example:
|
||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
||||
max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
||||
max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
|
||||
objective = "binary:logistic")
|
||||
pred <- predict(bst, agaricus.test$data)
|
||||
|
||||
|
||||
25
R-package/tests/helper_scripts/run-examples.R
Normal file
25
R-package/tests/helper_scripts/run-examples.R
Normal file
@ -0,0 +1,25 @@
|
||||
## Helper script for running individual examples.
|
||||
library(pkgload)
|
||||
library(xgboost)
|
||||
|
||||
files <- list.files("./man")
|
||||
|
||||
|
||||
run_example_timeit <- function(f) {
|
||||
path <- paste("./man/", f, sep = "")
|
||||
print(paste("Test", f))
|
||||
flush.console()
|
||||
t0 <- proc.time()
|
||||
run_example(path)
|
||||
t1 <- proc.time()
|
||||
list(file = f, time = t1 - t0)
|
||||
}
|
||||
|
||||
timings <- lapply(files, run_example_timeit)
|
||||
|
||||
for (t in timings) {
|
||||
ratio <- t$time[1] / t$time[3]
|
||||
if (!is.na(ratio) && !is.infinite(ratio) && ratio >= 2.5) {
|
||||
print(paste("Offending example:", t$file, ratio))
|
||||
}
|
||||
}
|
||||
@ -1,23 +1,28 @@
|
||||
context("basic functions")
|
||||
|
||||
data(agaricus.train, package = 'xgboost')
|
||||
data(agaricus.test, package = 'xgboost')
|
||||
data(agaricus.train, package = "xgboost")
|
||||
data(agaricus.test, package = "xgboost")
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
set.seed(1994)
|
||||
|
||||
# disable some tests for Win32
|
||||
windows_flag <- .Platform$OS.type == "windows" &&
|
||||
.Machine$sizeof.pointer != 8
|
||||
solaris_flag <- (Sys.info()['sysname'] == "SunOS")
|
||||
.Machine$sizeof.pointer != 8
|
||||
solaris_flag <- (Sys.info()["sysname"] == "SunOS")
|
||||
n_threads <- 1
|
||||
|
||||
|
||||
test_that("train and predict binary classification", {
|
||||
nrounds <- 2
|
||||
expect_output(
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
|
||||
eval_metric = "error")
|
||||
, "train-error")
|
||||
bst <- xgboost(
|
||||
data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = n_threads, nrounds = nrounds,
|
||||
objective = "binary:logistic", eval_metric = "error"
|
||||
),
|
||||
"train-error"
|
||||
)
|
||||
expect_equal(class(bst), "xgb.Booster")
|
||||
expect_equal(bst$niter, nrounds)
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
@ -46,26 +51,39 @@ test_that("parameter validation works", {
|
||||
d <- cbind(
|
||||
x1 = rnorm(10),
|
||||
x2 = rnorm(10),
|
||||
x3 = rnorm(10))
|
||||
x3 = rnorm(10)
|
||||
)
|
||||
y <- d[, "x1"] + d[, "x2"]^2 +
|
||||
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
|
||||
rnorm(10)
|
||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||
|
||||
correct <- function() {
|
||||
params <- list(max_depth = 2, booster = "dart",
|
||||
rate_drop = 0.5, one_drop = TRUE,
|
||||
objective = "reg:squarederror")
|
||||
params <- list(
|
||||
max_depth = 2,
|
||||
booster = "dart",
|
||||
rate_drop = 0.5,
|
||||
one_drop = TRUE,
|
||||
nthread = n_threads,
|
||||
objective = "reg:squarederror"
|
||||
)
|
||||
xgb.train(params = params, data = dtrain, nrounds = nrounds)
|
||||
}
|
||||
expect_silent(correct())
|
||||
incorrect <- function() {
|
||||
params <- list(max_depth = 2, booster = "dart",
|
||||
rate_drop = 0.5, one_drop = TRUE,
|
||||
objective = "reg:squarederror",
|
||||
foo = "bar", bar = "foo")
|
||||
params <- list(
|
||||
max_depth = 2,
|
||||
booster = "dart",
|
||||
rate_drop = 0.5,
|
||||
one_drop = TRUE,
|
||||
objective = "reg:squarederror",
|
||||
nthread = n_threads,
|
||||
foo = "bar",
|
||||
bar = "foo"
|
||||
)
|
||||
output <- capture.output(
|
||||
xgb.train(params = params, data = dtrain, nrounds = nrounds))
|
||||
xgb.train(params = params, data = dtrain, nrounds = nrounds)
|
||||
)
|
||||
print(output)
|
||||
}
|
||||
expect_output(incorrect(), '\\\\"bar\\\\", \\\\"foo\\\\"')
|
||||
@ -79,7 +97,8 @@ test_that("dart prediction works", {
|
||||
d <- cbind(
|
||||
x1 = rnorm(100),
|
||||
x2 = rnorm(100),
|
||||
x3 = rnorm(100))
|
||||
x3 = rnorm(100)
|
||||
)
|
||||
y <- d[, "x1"] + d[, "x2"]^2 +
|
||||
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
|
||||
rnorm(100)
|
||||
@ -93,7 +112,7 @@ test_that("dart prediction works", {
|
||||
rate_drop = 0.5,
|
||||
one_drop = TRUE,
|
||||
eta = 1,
|
||||
nthread = 2,
|
||||
nthread = n_threads,
|
||||
nrounds = nrounds,
|
||||
objective = "reg:squarederror"
|
||||
)
|
||||
@ -105,7 +124,7 @@ test_that("dart prediction works", {
|
||||
expect_false(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
|
||||
|
||||
set.seed(1994)
|
||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||
booster_by_train <- xgb.train(
|
||||
params = list(
|
||||
booster = "dart",
|
||||
@ -113,7 +132,7 @@ test_that("dart prediction works", {
|
||||
eta = 1,
|
||||
rate_drop = 0.5,
|
||||
one_drop = TRUE,
|
||||
nthread = 1,
|
||||
nthread = n_threads,
|
||||
objective = "reg:squarederror"
|
||||
),
|
||||
data = dtrain,
|
||||
@ -132,10 +151,13 @@ test_that("train and predict softprob", {
|
||||
lb <- as.numeric(iris$Species) - 1
|
||||
set.seed(11)
|
||||
expect_output(
|
||||
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
|
||||
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
|
||||
objective = "multi:softprob", num_class = 3, eval_metric = "merror")
|
||||
, "train-merror")
|
||||
bst <- xgboost(
|
||||
data = as.matrix(iris[, -5]), label = lb,
|
||||
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
|
||||
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
|
||||
),
|
||||
"train-merror"
|
||||
)
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
||||
@ -164,9 +186,10 @@ test_that("train and predict softprob", {
|
||||
x3 = rnorm(100)
|
||||
)
|
||||
y <- sample.int(10, 100, replace = TRUE) - 1
|
||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||
booster <- xgb.train(
|
||||
params = list(tree_method = "hist"), data = dtrain, nrounds = 4, num_class = 10,
|
||||
params = list(tree_method = "hist", nthread = n_threads),
|
||||
data = dtrain, nrounds = 4, num_class = 10,
|
||||
objective = "multi:softprob"
|
||||
)
|
||||
predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE)
|
||||
@ -178,10 +201,13 @@ test_that("train and predict softmax", {
|
||||
lb <- as.numeric(iris$Species) - 1
|
||||
set.seed(11)
|
||||
expect_output(
|
||||
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
|
||||
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
|
||||
objective = "multi:softmax", num_class = 3, eval_metric = "merror")
|
||||
, "train-merror")
|
||||
bst <- xgboost(
|
||||
data = as.matrix(iris[, -5]), label = lb,
|
||||
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
|
||||
objective = "multi:softmax", num_class = 3, eval_metric = "merror"
|
||||
),
|
||||
"train-merror"
|
||||
)
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
||||
@ -196,16 +222,19 @@ test_that("train and predict RF", {
|
||||
set.seed(11)
|
||||
lb <- train$label
|
||||
# single iteration
|
||||
bst <- xgboost(data = train$data, label = lb, max_depth = 5,
|
||||
nthread = 2, nrounds = 1, objective = "binary:logistic", eval_metric = "error",
|
||||
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
|
||||
bst <- xgboost(
|
||||
data = train$data, label = lb, max_depth = 5,
|
||||
nthread = n_threads,
|
||||
nrounds = 1, objective = "binary:logistic", eval_metric = "error",
|
||||
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1
|
||||
)
|
||||
expect_equal(bst$niter, 1)
|
||||
expect_equal(xgb.ntree(bst), 20)
|
||||
|
||||
pred <- predict(bst, train$data)
|
||||
pred_err <- sum((pred > 0.5) != lb) / length(lb)
|
||||
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
|
||||
#expect_lt(pred_err, 0.03)
|
||||
# expect_lt(pred_err, 0.03)
|
||||
|
||||
pred <- predict(bst, train$data, ntreelimit = 20)
|
||||
pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
|
||||
@ -219,11 +248,13 @@ test_that("train and predict RF with softprob", {
|
||||
lb <- as.numeric(iris$Species) - 1
|
||||
nrounds <- 15
|
||||
set.seed(11)
|
||||
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
|
||||
max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds,
|
||||
objective = "multi:softprob", eval_metric = "merror",
|
||||
num_class = 3, verbose = 0,
|
||||
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5)
|
||||
bst <- xgboost(
|
||||
data = as.matrix(iris[, -5]), label = lb,
|
||||
max_depth = 3, eta = 0.9, nthread = n_threads, nrounds = nrounds,
|
||||
objective = "multi:softprob", eval_metric = "merror",
|
||||
num_class = 3, verbose = 0,
|
||||
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5
|
||||
)
|
||||
expect_equal(bst$niter, 15)
|
||||
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
|
||||
# predict for all iterations:
|
||||
@ -240,18 +271,24 @@ test_that("train and predict RF with softprob", {
|
||||
|
||||
test_that("use of multiple eval metrics works", {
|
||||
expect_output(
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
|
||||
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
|
||||
, "train-error.*train-auc.*train-logloss")
|
||||
bst <- xgboost(
|
||||
data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
|
||||
),
|
||||
"train-error.*train-auc.*train-logloss"
|
||||
)
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
expect_equal(dim(bst$evaluation_log), c(2, 4))
|
||||
expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
||||
expect_output(
|
||||
bst2 <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
|
||||
eval_metric = list("error", "auc", "logloss"))
|
||||
, "train-error.*train-auc.*train-logloss")
|
||||
bst2 <- xgboost(
|
||||
data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
eval_metric = list("error", "auc", "logloss")
|
||||
),
|
||||
"train-error.*train-auc.*train-logloss"
|
||||
)
|
||||
expect_false(is.null(bst2$evaluation_log))
|
||||
expect_equal(dim(bst2$evaluation_log), c(2, 4))
|
||||
expect_equal(colnames(bst2$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
||||
@ -259,9 +296,11 @@ test_that("use of multiple eval metrics works", {
|
||||
|
||||
|
||||
test_that("training continuation works", {
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
|
||||
watchlist <- list(train = dtrain)
|
||||
param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
|
||||
param <- list(
|
||||
objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
|
||||
)
|
||||
|
||||
# for the reference, use 4 iterations at once:
|
||||
set.seed(11)
|
||||
@ -271,30 +310,33 @@ test_that("training continuation works", {
|
||||
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
|
||||
# continue for two more:
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
|
||||
if (!windows_flag && !solaris_flag)
|
||||
if (!windows_flag && !solaris_flag) {
|
||||
expect_equal(bst$raw, bst2$raw)
|
||||
}
|
||||
expect_false(is.null(bst2$evaluation_log))
|
||||
expect_equal(dim(bst2$evaluation_log), c(4, 2))
|
||||
expect_equal(bst2$evaluation_log, bst$evaluation_log)
|
||||
# test continuing from raw model data
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
|
||||
if (!windows_flag && !solaris_flag)
|
||||
if (!windows_flag && !solaris_flag) {
|
||||
expect_equal(bst$raw, bst2$raw)
|
||||
}
|
||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||
# test continuing from a model in file
|
||||
xgb.save(bst1, "xgboost.json")
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.json")
|
||||
if (!windows_flag && !solaris_flag)
|
||||
if (!windows_flag && !solaris_flag) {
|
||||
expect_equal(bst$raw, bst2$raw)
|
||||
}
|
||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||
file.remove("xgboost.json")
|
||||
})
|
||||
|
||||
test_that("model serialization works", {
|
||||
out_path <- "model_serialization"
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
|
||||
watchlist <- list(train = dtrain)
|
||||
param <- list(objective = "binary:logistic")
|
||||
param <- list(objective = "binary:logistic", nthread = n_threads)
|
||||
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
|
||||
raw <- xgb.serialize(booster)
|
||||
saveRDS(raw, out_path)
|
||||
@ -309,11 +351,14 @@ test_that("model serialization works", {
|
||||
test_that("xgb.cv works", {
|
||||
set.seed(11)
|
||||
expect_output(
|
||||
cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
|
||||
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
|
||||
eval_metric = "error", verbose = TRUE)
|
||||
, "train-error:")
|
||||
expect_is(cv, 'xgb.cv.synchronous')
|
||||
cv <- xgb.cv(
|
||||
data = train$data, label = train$label, max_depth = 2, nfold = 5,
|
||||
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
eval_metric = "error", verbose = TRUE
|
||||
),
|
||||
"train-error:"
|
||||
)
|
||||
expect_is(cv, "xgb.cv.synchronous")
|
||||
expect_false(is.null(cv$evaluation_log))
|
||||
expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
|
||||
expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008)
|
||||
@ -326,15 +371,19 @@ test_that("xgb.cv works", {
|
||||
})
|
||||
|
||||
test_that("xgb.cv works with stratified folds", {
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
|
||||
set.seed(314159)
|
||||
cv <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
|
||||
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
|
||||
verbose = TRUE, stratified = FALSE)
|
||||
cv <- xgb.cv(
|
||||
data = dtrain, max_depth = 2, nfold = 5,
|
||||
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
verbose = TRUE, stratified = FALSE
|
||||
)
|
||||
set.seed(314159)
|
||||
cv2 <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
|
||||
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
|
||||
verbose = TRUE, stratified = TRUE)
|
||||
cv2 <- xgb.cv(
|
||||
data = dtrain, max_depth = 2, nfold = 5,
|
||||
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
verbose = TRUE, stratified = TRUE
|
||||
)
|
||||
# Stratified folds should result in a different evaluation logs
|
||||
expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
|
||||
})
|
||||
@ -342,40 +391,57 @@ test_that("xgb.cv works with stratified folds", {
|
||||
test_that("train and predict with non-strict classes", {
|
||||
# standard dense matrix input
|
||||
train_dense <- as.matrix(train$data)
|
||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
||||
bst <- xgboost(
|
||||
data = train_dense, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
verbose = 0
|
||||
)
|
||||
pr0 <- predict(bst, train_dense)
|
||||
|
||||
# dense matrix-like input of non-matrix class
|
||||
class(train_dense) <- 'shmatrix'
|
||||
class(train_dense) <- "shmatrix"
|
||||
expect_true(is.matrix(train_dense))
|
||||
expect_error(
|
||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
||||
, regexp = NA)
|
||||
bst <- xgboost(
|
||||
data = train_dense, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
verbose = 0
|
||||
),
|
||||
regexp = NA
|
||||
)
|
||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||
expect_equal(pr0, pr)
|
||||
|
||||
# dense matrix-like input of non-matrix class with some inheritance
|
||||
class(train_dense) <- c('pphmatrix', 'shmatrix')
|
||||
class(train_dense) <- c("pphmatrix", "shmatrix")
|
||||
expect_true(is.matrix(train_dense))
|
||||
expect_error(
|
||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
||||
, regexp = NA)
|
||||
bst <- xgboost(
|
||||
data = train_dense, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
verbose = 0
|
||||
),
|
||||
regexp = NA
|
||||
)
|
||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||
expect_equal(pr0, pr)
|
||||
|
||||
# when someone inherits from xgb.Booster, it should still be possible to use it as xgb.Booster
|
||||
class(bst) <- c('super.Booster', 'xgb.Booster')
|
||||
class(bst) <- c("super.Booster", "xgb.Booster")
|
||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||
expect_equal(pr0, pr)
|
||||
})
|
||||
|
||||
test_that("max_delta_step works", {
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtrain <- xgb.DMatrix(
|
||||
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||
)
|
||||
watchlist <- list(train = dtrain)
|
||||
param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
|
||||
param <- list(
|
||||
objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
|
||||
nthread = n_threads,
|
||||
eta = 0.5
|
||||
)
|
||||
nrounds <- 5
|
||||
# model with no restriction on max_delta_step
|
||||
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
|
||||
@ -395,14 +461,16 @@ test_that("colsample_bytree works", {
|
||||
test_y <- as.numeric(rowSums(test_x) > 0)
|
||||
colnames(train_x) <- paste0("Feature_", sprintf("%03d", 1:100))
|
||||
colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
|
||||
dtrain <- xgb.DMatrix(train_x, label = train_y)
|
||||
dtest <- xgb.DMatrix(test_x, label = test_y)
|
||||
dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
|
||||
dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
|
||||
watchlist <- list(train = dtrain, eval = dtest)
|
||||
## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
|
||||
## each tree
|
||||
param <- list(max_depth = 2, eta = 0, nthread = 2,
|
||||
colsample_bytree = 0.01, objective = "binary:logistic",
|
||||
eval_metric = "auc")
|
||||
param <- list(
|
||||
max_depth = 2, eta = 0, nthread = n_threads,
|
||||
colsample_bytree = 0.01, objective = "binary:logistic",
|
||||
eval_metric = "auc"
|
||||
)
|
||||
set.seed(2)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
|
||||
xgb.importance(model = bst)
|
||||
@ -412,9 +480,11 @@ test_that("colsample_bytree works", {
|
||||
})
|
||||
|
||||
test_that("Configuration works", {
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
|
||||
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
|
||||
bst <- xgboost(
|
||||
data = train$data, label = train$label, max_depth = 2,
|
||||
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
|
||||
)
|
||||
config <- xgb.config(bst)
|
||||
xgb.config(bst) <- config
|
||||
reloaded_config <- xgb.config(bst)
|
||||
@ -451,22 +521,26 @@ test_that("strict_shape works", {
|
||||
y <- as.numeric(iris$Species) - 1
|
||||
X <- as.matrix(iris[, -5])
|
||||
|
||||
bst <- xgboost(data = X, label = y,
|
||||
max_depth = 2, nrounds = n_rounds,
|
||||
objective = "multi:softprob", num_class = 3, eval_metric = "merror")
|
||||
bst <- xgboost(
|
||||
data = X, label = y,
|
||||
max_depth = 2, nrounds = n_rounds, nthread = n_threads,
|
||||
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
|
||||
)
|
||||
|
||||
test_strict_shape(bst, X, 3)
|
||||
}
|
||||
|
||||
|
||||
test_agaricus <- function() {
|
||||
data(agaricus.train, package = 'xgboost')
|
||||
data(agaricus.train, package = "xgboost")
|
||||
X <- agaricus.train$data
|
||||
y <- agaricus.train$label
|
||||
|
||||
bst <- xgboost(data = X, label = y, max_depth = 2,
|
||||
nrounds = n_rounds, objective = "binary:logistic",
|
||||
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
|
||||
bst <- xgboost(
|
||||
data = X, label = y, max_depth = 2, nthread = n_threads,
|
||||
nrounds = n_rounds, objective = "binary:logistic",
|
||||
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
|
||||
)
|
||||
|
||||
test_strict_shape(bst, X, 1)
|
||||
}
|
||||
@ -481,8 +555,10 @@ test_that("'predict' accepts CSR data", {
|
||||
x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
|
||||
x_csr <- as(x_csc, "RsparseMatrix")
|
||||
x_spv <- as(x_csc, "sparseVector")
|
||||
bst <- xgboost(data = X, label = y, objective = "binary:logistic",
|
||||
nrounds = 5L, verbose = FALSE)
|
||||
bst <- xgboost(
|
||||
data = X, label = y, objective = "binary:logistic",
|
||||
nrounds = 5L, verbose = FALSE, nthread = n_threads,
|
||||
)
|
||||
p_csc <- predict(bst, x_csc)
|
||||
p_csr <- predict(bst, x_csr)
|
||||
p_spv <- predict(bst, x_spv)
|
||||
|
||||
@ -6,6 +6,8 @@ data(agaricus.test, package = 'xgboost')
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
|
||||
n_threads <- 2
|
||||
|
||||
# add some label noise for early stopping tests
|
||||
add.noise <- function(label, frac) {
|
||||
inoise <- sample(length(label), length(label) * frac)
|
||||
@ -15,15 +17,15 @@ add.noise <- function(label, frac) {
|
||||
set.seed(11)
|
||||
ltrain <- add.noise(train$label, 0.2)
|
||||
ltest <- add.noise(test$label, 0.2)
|
||||
dtrain <- xgb.DMatrix(train$data, label = ltrain)
|
||||
dtest <- xgb.DMatrix(test$data, label = ltest)
|
||||
dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
|
||||
dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
|
||||
watchlist <- list(train = dtrain, test = dtest)
|
||||
|
||||
|
||||
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
|
||||
|
||||
param <- list(objective = "binary:logistic", eval_metric = "error",
|
||||
max_depth = 2, nthread = 2)
|
||||
max_depth = 2, nthread = n_threads)
|
||||
|
||||
|
||||
test_that("cb.print.evaluation works as expected", {
|
||||
@ -103,7 +105,7 @@ test_that("cb.evaluation.log works as expected", {
|
||||
|
||||
|
||||
param <- list(objective = "binary:logistic", eval_metric = "error",
|
||||
max_depth = 4, nthread = 2)
|
||||
max_depth = 4, nthread = n_threads)
|
||||
|
||||
test_that("can store evaluation_log without printing", {
|
||||
expect_silent(
|
||||
@ -179,8 +181,10 @@ test_that("cb.save.model works as expected", {
|
||||
expect_true(file.exists('xgboost_01.json'))
|
||||
expect_true(file.exists('xgboost_02.json'))
|
||||
b1 <- xgb.load('xgboost_01.json')
|
||||
xgb.parameters(b1) <- list(nthread = 2)
|
||||
expect_equal(xgb.ntree(b1), 1)
|
||||
b2 <- xgb.load('xgboost_02.json')
|
||||
xgb.parameters(b2) <- list(nthread = 2)
|
||||
expect_equal(xgb.ntree(b2), 2)
|
||||
|
||||
xgb.config(b2) <- xgb.config(bst)
|
||||
@ -267,7 +271,8 @@ test_that("early stopping works with titanic", {
|
||||
objective = "binary:logistic",
|
||||
eval_metric = "auc",
|
||||
nrounds = 100,
|
||||
early_stopping_rounds = 3
|
||||
early_stopping_rounds = 3,
|
||||
nthread = n_threads
|
||||
)
|
||||
|
||||
expect_true(TRUE) # should not crash
|
||||
@ -308,7 +313,7 @@ test_that("prediction in xgb.cv works", {
|
||||
|
||||
test_that("prediction in xgb.cv works for gblinear too", {
|
||||
set.seed(11)
|
||||
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = 2)
|
||||
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads)
|
||||
cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
|
||||
expect_false(is.null(cv$evaluation_log))
|
||||
expect_false(is.null(cv$pred))
|
||||
@ -341,7 +346,7 @@ test_that("prediction in xgb.cv for softprob works", {
|
||||
set.seed(11)
|
||||
expect_warning(
|
||||
cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
|
||||
eta = 0.5, nrounds = 5, max_depth = 3, nthread = 2,
|
||||
eta = 0.5, nrounds = 5, max_depth = 3, nthread = n_threads,
|
||||
subsample = 0.8, gamma = 2, verbose = 0,
|
||||
prediction = TRUE, objective = "multi:softprob", num_class = 3)
|
||||
, NA)
|
||||
|
||||
@ -2,10 +2,16 @@ context('Test models with custom objective')
|
||||
|
||||
set.seed(1994)
|
||||
|
||||
n_threads <- 2
|
||||
|
||||
data(agaricus.train, package = 'xgboost')
|
||||
data(agaricus.test, package = 'xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
dtrain <- xgb.DMatrix(
|
||||
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||
)
|
||||
dtest <- xgb.DMatrix(
|
||||
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||
)
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
|
||||
logregobj <- function(preds, dtrain) {
|
||||
@ -22,7 +28,7 @@ evalerror <- function(preds, dtrain) {
|
||||
return(list(metric = "error", value = err))
|
||||
}
|
||||
|
||||
param <- list(max_depth = 2, eta = 1, nthread = 2,
|
||||
param <- list(max_depth = 2, eta = 1, nthread = n_threads,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
num_round <- 2
|
||||
|
||||
@ -67,7 +73,7 @@ test_that("custom objective using DMatrix attr works", {
|
||||
test_that("custom objective with multi-class shape", {
|
||||
data <- as.matrix(iris[, -5])
|
||||
label <- as.numeric(iris$Species) - 1
|
||||
dtrain <- xgb.DMatrix(data = data, label = label)
|
||||
dtrain <- xgb.DMatrix(data = data, label = label, nthread = n_threads)
|
||||
n_classes <- 3
|
||||
|
||||
fake_softprob <- function(preds, dtrain) {
|
||||
|
||||
@ -5,19 +5,21 @@ data(agaricus.test, package = "xgboost")
|
||||
test_data <- agaricus.test$data[1:100, ]
|
||||
test_label <- agaricus.test$label[1:100]
|
||||
|
||||
n_threads <- 2
|
||||
|
||||
test_that("xgb.DMatrix: basic construction", {
|
||||
# from sparse matrix
|
||||
dtest1 <- xgb.DMatrix(test_data, label = test_label)
|
||||
dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||
|
||||
# from dense matrix
|
||||
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label)
|
||||
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label, nthread = n_threads)
|
||||
expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label"))
|
||||
expect_equal(dim(dtest1), dim(dtest2))
|
||||
|
||||
# from dense integer matrix
|
||||
int_data <- as.matrix(test_data)
|
||||
storage.mode(int_data) <- "integer"
|
||||
dtest3 <- xgb.DMatrix(int_data, label = test_label)
|
||||
dtest3 <- xgb.DMatrix(int_data, label = test_label, nthread = n_threads)
|
||||
expect_equal(dim(dtest1), dim(dtest3))
|
||||
|
||||
n_samples <- 100
|
||||
@ -29,15 +31,15 @@ test_that("xgb.DMatrix: basic construction", {
|
||||
X <- matrix(X, nrow = n_samples)
|
||||
y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)
|
||||
|
||||
fd <- xgb.DMatrix(X, label = y, missing = 1)
|
||||
fd <- xgb.DMatrix(X, label = y, missing = 1, nthread = n_threads)
|
||||
|
||||
dgc <- as(X, "dgCMatrix")
|
||||
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0)
|
||||
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0, nthread = n_threads)
|
||||
|
||||
dgr <- as(X, "dgRMatrix")
|
||||
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1)
|
||||
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1, nthread = n_threads)
|
||||
|
||||
params <- list(tree_method = "hist")
|
||||
params <- list(tree_method = "hist", nthread = n_threads)
|
||||
bst_fd <- xgb.train(
|
||||
params, nrounds = 8, fd, watchlist = list(train = fd)
|
||||
)
|
||||
@ -64,12 +66,12 @@ test_that("xgb.DMatrix: NA", {
|
||||
)
|
||||
x[1, "x1"] <- NA
|
||||
|
||||
m <- xgb.DMatrix(x)
|
||||
m <- xgb.DMatrix(x, nthread = n_threads)
|
||||
xgb.DMatrix.save(m, "int.dmatrix")
|
||||
|
||||
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
|
||||
colnames(x) <- c("x1", "x2")
|
||||
m <- xgb.DMatrix(x)
|
||||
m <- xgb.DMatrix(x, nthread = n_threads)
|
||||
|
||||
xgb.DMatrix.save(m, "float.dmatrix")
|
||||
|
||||
@ -94,7 +96,7 @@ test_that("xgb.DMatrix: NA", {
|
||||
|
||||
test_that("xgb.DMatrix: saving, loading", {
|
||||
# save to a local file
|
||||
dtest1 <- xgb.DMatrix(test_data, label = test_label)
|
||||
dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||
tmp_file <- tempfile('xgb.DMatrix_')
|
||||
on.exit(unlink(tmp_file))
|
||||
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
|
||||
@ -109,13 +111,17 @@ test_that("xgb.DMatrix: saving, loading", {
|
||||
tmp_file <- tempfile(fileext = ".libsvm")
|
||||
writeLines(tmp, tmp_file)
|
||||
expect_true(file.exists(tmp_file))
|
||||
dtest4 <- xgb.DMatrix(paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE)
|
||||
dtest4 <- xgb.DMatrix(
|
||||
paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE, nthread = n_threads
|
||||
)
|
||||
expect_equal(dim(dtest4), c(3, 4))
|
||||
expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))
|
||||
|
||||
# check that feature info is saved
|
||||
data(agaricus.train, package = 'xgboost')
|
||||
dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
||||
dtrain <- xgb.DMatrix(
|
||||
data = agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||
)
|
||||
cnames <- colnames(dtrain)
|
||||
expect_equal(length(cnames), 126)
|
||||
tmp_file <- tempfile('xgb.DMatrix_')
|
||||
@ -129,7 +135,7 @@ test_that("xgb.DMatrix: saving, loading", {
|
||||
})
|
||||
|
||||
test_that("xgb.DMatrix: getinfo & setinfo", {
|
||||
dtest <- xgb.DMatrix(test_data)
|
||||
dtest <- xgb.DMatrix(test_data, nthread = n_threads)
|
||||
expect_true(setinfo(dtest, 'label', test_label))
|
||||
labels <- getinfo(dtest, 'label')
|
||||
expect_equal(test_label, getinfo(dtest, 'label'))
|
||||
@ -156,7 +162,7 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
|
||||
})
|
||||
|
||||
test_that("xgb.DMatrix: slice, dim", {
|
||||
dtest <- xgb.DMatrix(test_data, label = test_label)
|
||||
dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||
expect_equal(dim(dtest), dim(test_data))
|
||||
dsub1 <- slice(dtest, 1:42)
|
||||
expect_equal(nrow(dsub1), 42)
|
||||
@ -171,16 +177,20 @@ test_that("xgb.DMatrix: slice, trailing empty rows", {
|
||||
data(agaricus.train, package = 'xgboost')
|
||||
train_data <- agaricus.train$data
|
||||
train_label <- agaricus.train$label
|
||||
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
|
||||
dtrain <- xgb.DMatrix(
|
||||
data = train_data, label = train_label, nthread = n_threads
|
||||
)
|
||||
slice(dtrain, 6513L)
|
||||
train_data[6513, ] <- 0
|
||||
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
|
||||
dtrain <- xgb.DMatrix(
|
||||
data = train_data, label = train_label, nthread = n_threads
|
||||
)
|
||||
slice(dtrain, 6513L)
|
||||
expect_equal(nrow(dtrain), 6513)
|
||||
})
|
||||
|
||||
test_that("xgb.DMatrix: colnames", {
|
||||
dtest <- xgb.DMatrix(test_data, label = test_label)
|
||||
dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||
expect_equal(colnames(dtest), colnames(test_data))
|
||||
expect_error(colnames(dtest) <- 'asdf')
|
||||
new_names <- make.names(seq_len(ncol(test_data)))
|
||||
@ -196,7 +206,7 @@ test_that("xgb.DMatrix: nrow is correct for a very sparse matrix", {
|
||||
x <- Matrix::rsparsematrix(nr, 100, density = 0.0005)
|
||||
# we want it very sparse, so that last rows are empty
|
||||
expect_lt(max(x@i), nr)
|
||||
dtest <- xgb.DMatrix(x)
|
||||
dtest <- xgb.DMatrix(x, nthread = n_threads)
|
||||
expect_equal(dim(dtest), dim(x))
|
||||
})
|
||||
|
||||
@ -205,8 +215,8 @@ test_that("xgb.DMatrix: print", {
|
||||
|
||||
# core DMatrix with just data and labels
|
||||
dtrain <- xgb.DMatrix(
|
||||
data = agaricus.train$data
|
||||
, label = agaricus.train$label
|
||||
data = agaricus.train$data, label = agaricus.train$label,
|
||||
nthread = n_threads
|
||||
)
|
||||
txt <- capture.output({
|
||||
print(dtrain)
|
||||
@ -222,10 +232,11 @@ test_that("xgb.DMatrix: print", {
|
||||
|
||||
# DMatrix with weights and base_margin
|
||||
dtrain <- xgb.DMatrix(
|
||||
data = agaricus.train$data
|
||||
, label = agaricus.train$label
|
||||
, weight = seq_along(agaricus.train$label)
|
||||
, base_margin = agaricus.train$label
|
||||
data = agaricus.train$data,
|
||||
label = agaricus.train$label,
|
||||
weight = seq_along(agaricus.train$label),
|
||||
base_margin = agaricus.train$label,
|
||||
nthread = n_threads
|
||||
)
|
||||
txt <- capture.output({
|
||||
print(dtrain)
|
||||
@ -234,7 +245,8 @@ test_that("xgb.DMatrix: print", {
|
||||
|
||||
# DMatrix with just features
|
||||
dtrain <- xgb.DMatrix(
|
||||
data = agaricus.train$data
|
||||
data = agaricus.train$data,
|
||||
nthread = n_threads
|
||||
)
|
||||
txt <- capture.output({
|
||||
print(dtrain)
|
||||
@ -245,7 +257,8 @@ test_that("xgb.DMatrix: print", {
|
||||
data_no_colnames <- agaricus.train$data
|
||||
colnames(data_no_colnames) <- NULL
|
||||
dtrain <- xgb.DMatrix(
|
||||
data = data_no_colnames
|
||||
data = data_no_colnames,
|
||||
nthread = n_threads
|
||||
)
|
||||
txt <- capture.output({
|
||||
print(dtrain)
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
context("feature weights")
|
||||
|
||||
n_threads <- 2
|
||||
|
||||
test_that("training with feature weights works", {
|
||||
nrows <- 1000
|
||||
ncols <- 9
|
||||
@ -10,8 +12,12 @@ test_that("training with feature weights works", {
|
||||
|
||||
test <- function(tm) {
|
||||
names <- paste0("f", 1:ncols)
|
||||
xy <- xgb.DMatrix(data = x, label = y, feature_weights = weights)
|
||||
params <- list(colsample_bynode = 0.4, tree_method = tm, nthread = 1)
|
||||
xy <- xgb.DMatrix(
|
||||
data = x, label = y, feature_weights = weights, nthread = n_threads
|
||||
)
|
||||
params <- list(
|
||||
colsample_bynode = 0.4, tree_method = tm, nthread = n_threads
|
||||
)
|
||||
model <- xgb.train(params = params, data = xy, nrounds = 32)
|
||||
importance <- xgb.importance(model = model, feature_names = names)
|
||||
expect_equal(dim(importance), c(ncols, 4))
|
||||
|
||||
@ -1,13 +1,19 @@
|
||||
context('Test generalized linear models')
|
||||
|
||||
n_threads <- 2
|
||||
|
||||
test_that("gblinear works", {
|
||||
data(agaricus.train, package = 'xgboost')
|
||||
data(agaricus.test, package = 'xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
dtrain <- xgb.DMatrix(
|
||||
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||
)
|
||||
dtest <- xgb.DMatrix(
|
||||
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||
)
|
||||
|
||||
param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
|
||||
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
|
||||
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
|
||||
n <- 5 # iterations
|
||||
@ -48,12 +54,16 @@ test_that("gblinear works", {
|
||||
test_that("gblinear early stopping works", {
|
||||
data(agaricus.train, package = 'xgboost')
|
||||
data(agaricus.test, package = 'xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
dtrain <- xgb.DMatrix(
|
||||
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||
)
|
||||
dtest <- xgb.DMatrix(
|
||||
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||
)
|
||||
|
||||
param <- list(
|
||||
objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
|
||||
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
|
||||
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
|
||||
updater = "coord_descent"
|
||||
)
|
||||
|
||||
|
||||
@ -171,6 +171,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
|
||||
fit <- xgboost(
|
||||
params = c(
|
||||
list(
|
||||
nthread = 2,
|
||||
booster = booster,
|
||||
objective = "reg:squarederror",
|
||||
eval_metric = "rmse"),
|
||||
@ -257,7 +258,7 @@ test_that("xgb.Booster serializing as R object works", {
|
||||
.skip_if_vcd_not_available()
|
||||
saveRDS(bst.Tree, 'xgb.model.rds')
|
||||
bst <- readRDS('xgb.model.rds')
|
||||
dtrain <- xgb.DMatrix(sparse_matrix, label = label)
|
||||
dtrain <- xgb.DMatrix(sparse_matrix, label = label, nthread = 2)
|
||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
||||
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
|
||||
xgb.save(bst, 'xgb.model')
|
||||
@ -363,7 +364,8 @@ test_that("xgb.importance works with and without feature names", {
|
||||
data = as.matrix(data.frame(x = c(0, 1))),
|
||||
label = c(1, 2),
|
||||
nrounds = 1,
|
||||
base_score = 0.5
|
||||
base_score = 0.5,
|
||||
nthread = 2
|
||||
)
|
||||
df <- xgb.model.dt.tree(model = m)
|
||||
expect_equal(df$Feature, "Leaf")
|
||||
|
||||
@ -2,6 +2,8 @@ require(xgboost)
|
||||
|
||||
context("interaction constraints")
|
||||
|
||||
n_threads <- 2
|
||||
|
||||
set.seed(1024)
|
||||
x1 <- rnorm(1000, 1)
|
||||
x2 <- rnorm(1000, 1)
|
||||
@ -45,11 +47,18 @@ test_that("interaction constraints scientific representation", {
|
||||
d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols)
|
||||
y <- rnorm(rows)
|
||||
|
||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))
|
||||
|
||||
with_inc <- xgb.train(data = dtrain, tree_method = 'hist',
|
||||
interaction_constraints = inc, nrounds = 10)
|
||||
without_inc <- xgb.train(data = dtrain, tree_method = 'hist', nrounds = 10)
|
||||
with_inc <- xgb.train(
|
||||
data = dtrain,
|
||||
tree_method = 'hist',
|
||||
interaction_constraints = inc,
|
||||
nrounds = 10,
|
||||
nthread = n_threads
|
||||
)
|
||||
without_inc <- xgb.train(
|
||||
data = dtrain, tree_method = 'hist', nrounds = 10, nthread = n_threads
|
||||
)
|
||||
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
|
||||
})
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
context('Test prediction of feature interactions')
|
||||
|
||||
set.seed(123)
|
||||
n_threads <- 2
|
||||
|
||||
test_that("predict feature interactions works", {
|
||||
# simulate some binary data and a linear outcome with an interaction term
|
||||
@ -19,8 +20,10 @@ test_that("predict feature interactions works", {
|
||||
|
||||
y <- f_int(X)
|
||||
|
||||
dm <- xgb.DMatrix(X, label = y)
|
||||
param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
|
||||
dm <- xgb.DMatrix(X, label = y, nthread = n_threads)
|
||||
param <- list(
|
||||
eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = n_threads
|
||||
)
|
||||
b <- xgb.train(param, dm, 100)
|
||||
|
||||
pred <- predict(b, dm, outputmargin = TRUE)
|
||||
@ -99,11 +102,13 @@ test_that("SHAP contribution values are not NAN", {
|
||||
verbose = 0,
|
||||
params = list(
|
||||
objective = "reg:squarederror",
|
||||
eval_metric = "rmse"),
|
||||
eval_metric = "rmse",
|
||||
nthread = n_threads
|
||||
),
|
||||
data = as.matrix(subset(d, fold == 2)[, ivs]),
|
||||
label = subset(d, fold == 2)$y,
|
||||
nthread = 1,
|
||||
nrounds = 3)
|
||||
nrounds = 3
|
||||
)
|
||||
|
||||
shaps <- as.data.frame(predict(fit,
|
||||
newdata = as.matrix(subset(d, fold == 1)[, ivs]),
|
||||
@ -116,8 +121,12 @@ test_that("SHAP contribution values are not NAN", {
|
||||
|
||||
|
||||
test_that("multiclass feature interactions work", {
|
||||
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
|
||||
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
|
||||
dm <- xgb.DMatrix(
|
||||
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
|
||||
)
|
||||
param <- list(
|
||||
eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3, nthread = n_threads
|
||||
)
|
||||
b <- xgb.train(param, dm, 40)
|
||||
pred <- t(
|
||||
array(
|
||||
@ -166,6 +175,7 @@ test_that("SHAP single sample works", {
|
||||
max_depth = 2,
|
||||
nrounds = 4,
|
||||
objective = "binary:logistic",
|
||||
nthread = n_threads
|
||||
)
|
||||
|
||||
predt <- predict(
|
||||
|
||||
@ -9,7 +9,8 @@ test_that("load/save raw works", {
|
||||
nrounds <- 8
|
||||
booster <- xgboost(
|
||||
data = train$data, label = train$label,
|
||||
nrounds = nrounds, objective = "binary:logistic"
|
||||
nrounds = nrounds, objective = "binary:logistic",
|
||||
nthread = 2
|
||||
)
|
||||
|
||||
json_bytes <- xgb.save.raw(booster, raw_format = "json")
|
||||
|
||||
@ -66,7 +66,7 @@ test_that("Models from previous versions of XGBoost can be loaded", {
|
||||
unzip(zipfile, exdir = extract_dir, overwrite = TRUE)
|
||||
model_dir <- file.path(extract_dir, 'models')
|
||||
|
||||
pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4))
|
||||
pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4), nthread = 2)
|
||||
|
||||
lapply(list.files(model_dir), function (x) {
|
||||
model_file <- file.path(model_dir, x)
|
||||
@ -87,6 +87,7 @@ test_that("Models from previous versions of XGBoost can be loaded", {
|
||||
booster <- readRDS(model_file)
|
||||
} else {
|
||||
booster <- xgb.load(model_file)
|
||||
xgb.parameters(booster) <- list(nthread = 2)
|
||||
}
|
||||
predict(booster, newdata = pred_data)
|
||||
run_booster_check(booster, name)
|
||||
|
||||
@ -3,8 +3,12 @@ context('Test model params and call are exposed to R')
|
||||
data(agaricus.train, package = 'xgboost')
|
||||
data(agaricus.test, package = 'xgboost')
|
||||
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
dtrain <- xgb.DMatrix(
|
||||
agaricus.train$data, label = agaricus.train$label, nthread = 2
|
||||
)
|
||||
dtest <- xgb.DMatrix(
|
||||
agaricus.test$data, label = agaricus.test$label, nthread = 2
|
||||
)
|
||||
|
||||
bst <- xgboost(data = dtrain,
|
||||
max_depth = 2,
|
||||
|
||||
@ -4,8 +4,10 @@ set.seed(1994)
|
||||
|
||||
test_that("Poisson regression works", {
|
||||
data(mtcars)
|
||||
bst <- xgboost(data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
|
||||
objective = 'count:poisson', nrounds = 10, verbose = 0)
|
||||
bst <- xgboost(
|
||||
data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
|
||||
objective = 'count:poisson', nrounds = 10, verbose = 0, nthread = 2
|
||||
)
|
||||
expect_equal(class(bst), "xgb.Booster")
|
||||
pred <- predict(bst, as.matrix(mtcars[, -11]))
|
||||
expect_equal(length(pred), 32)
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
context('Learning to rank')
|
||||
|
||||
n_threads <- 2
|
||||
|
||||
test_that('Test ranking with unweighted data', {
|
||||
X <- Matrix::sparseMatrix(
|
||||
i = c(2, 3, 7, 9, 12, 15, 17, 18)
|
||||
@ -9,10 +11,10 @@ test_that('Test ranking with unweighted data', {
|
||||
)
|
||||
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
|
||||
group <- c(5, 5, 5, 5)
|
||||
dtrain <- xgb.DMatrix(X, label = y, group = group)
|
||||
dtrain <- xgb.DMatrix(X, label = y, group = group, nthread = n_threads)
|
||||
|
||||
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
|
||||
eval_metric = 'auc', eval_metric = 'aucpr')
|
||||
eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
|
||||
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
||||
# Check if the metric is monotone increasing
|
||||
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
||||
@ -29,10 +31,14 @@ test_that('Test ranking with weighted data', {
|
||||
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
|
||||
group <- c(5, 5, 5, 5)
|
||||
weight <- c(1.0, 2.0, 3.0, 4.0)
|
||||
dtrain <- xgb.DMatrix(X, label = y, group = group, weight = weight)
|
||||
dtrain <- xgb.DMatrix(
|
||||
X, label = y, group = group, weight = weight, nthread = n_threads
|
||||
)
|
||||
|
||||
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
|
||||
eval_metric = 'auc', eval_metric = 'aucpr')
|
||||
params <- list(
|
||||
eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
|
||||
eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
|
||||
)
|
||||
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
||||
# Check if the metric is monotone increasing
|
||||
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
||||
|
||||
@ -16,6 +16,7 @@ test_that("Can save and load models with Unicode paths", {
|
||||
path <- file.path(tmpdir, x)
|
||||
xgb.save(bst, path)
|
||||
bst2 <- xgb.load(path)
|
||||
xgb.parameters(bst2) <- list(nthread = 2)
|
||||
expect_equal(predict(bst, test$data), predict(bst2, test$data))
|
||||
})
|
||||
})
|
||||
|
||||
@ -2,8 +2,15 @@ context("update trees in an existing model")
|
||||
|
||||
data(agaricus.train, package = 'xgboost')
|
||||
data(agaricus.test, package = 'xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
||||
|
||||
n_threads <- 1
|
||||
|
||||
dtrain <- xgb.DMatrix(
|
||||
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||
)
|
||||
dtest <- xgb.DMatrix(
|
||||
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||
)
|
||||
|
||||
# Disable flaky tests for 32-bit Windows.
|
||||
# See https://github.com/dmlc/xgboost/issues/3720
|
||||
@ -14,7 +21,7 @@ test_that("updating the model works", {
|
||||
|
||||
# no-subsampling
|
||||
p1 <- list(
|
||||
objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2,
|
||||
objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = n_threads,
|
||||
updater = "grow_colmaker,prune"
|
||||
)
|
||||
set.seed(11)
|
||||
@ -86,9 +93,11 @@ test_that("updating the model works", {
|
||||
})
|
||||
|
||||
test_that("updating works for multiclass & multitree", {
|
||||
dtr <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
|
||||
dtr <- xgb.DMatrix(
|
||||
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
|
||||
)
|
||||
watchlist <- list(train = dtr)
|
||||
p0 <- list(max_depth = 2, eta = 0.5, nthread = 2, subsample = 0.6,
|
||||
p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
|
||||
objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
|
||||
base_score = 0)
|
||||
set.seed(121)
|
||||
|
||||
@ -31,6 +31,8 @@ require(data.table)
|
||||
if (!require('vcd')) {
|
||||
install.packages('vcd')
|
||||
}
|
||||
|
||||
data.table::setDTthreads(2)
|
||||
```
|
||||
|
||||
> **VCD** package is used for one of its embedded dataset only.
|
||||
@ -297,23 +299,25 @@ test <- agaricus.test
|
||||
|
||||
#Random Forest - 1000 trees
|
||||
bst <- xgboost(
|
||||
data = train$data
|
||||
, label = train$label
|
||||
, max_depth = 4
|
||||
, num_parallel_tree = 1000
|
||||
, subsample = 0.5
|
||||
, colsample_bytree = 0.5
|
||||
, nrounds = 1
|
||||
, objective = "binary:logistic"
|
||||
data = train$data,
|
||||
label = train$label,
|
||||
max_depth = 4,
|
||||
num_parallel_tree = 1000,
|
||||
subsample = 0.5,
|
||||
colsample_bytree = 0.5,
|
||||
nrounds = 1,
|
||||
objective = "binary:logistic",
|
||||
nthread = 2
|
||||
)
|
||||
|
||||
#Boosting - 3 rounds
|
||||
bst <- xgboost(
|
||||
data = train$data
|
||||
, label = train$label
|
||||
, max_depth = 4
|
||||
, nrounds = 3
|
||||
, objective = "binary:logistic"
|
||||
data = train$data,
|
||||
label = train$label,
|
||||
max_depth = 4,
|
||||
nrounds = 3,
|
||||
objective = "binary:logistic",
|
||||
nthread = 2
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
@ -86,9 +86,10 @@ data(agaricus.test, package='xgboost')
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||
nrounds = 2, objective = "binary:logistic")
|
||||
nrounds = 2, objective = "binary:logistic", nthread = 2)
|
||||
xgb.save(bst, 'model.save')
|
||||
bst = xgb.load('model.save')
|
||||
xgb.parameters(bst) <- list(nthread = 2)
|
||||
pred <- predict(bst, test$data)
|
||||
@
|
||||
|
||||
@ -127,7 +128,7 @@ training from initial prediction value, weighted training instance.
|
||||
|
||||
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
|
||||
<<xgb.DMatrix>>=
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)
|
||||
class(dtrain)
|
||||
head(getinfo(dtrain,'label'))
|
||||
@
|
||||
@ -161,9 +162,9 @@ evalerror <- function(preds, dtrain) {
|
||||
return(list(metric = "MSE", value = err))
|
||||
}
|
||||
|
||||
dtest <- xgb.DMatrix(test$data, label = test$label)
|
||||
dtest <- xgb.DMatrix(test$data, label = test$label, nthread = 2)
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
param <- list(max_depth = 2, eta = 1)
|
||||
param <- list(max_depth = 2, eta = 1, nthread = 2)
|
||||
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
|
||||
@
|
||||
|
||||
@ -173,13 +173,13 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**
|
||||
|
||||
```{r trainingDense, message=F, warning=F}
|
||||
bstDense <- xgboost(
|
||||
data = as.matrix(train$data)
|
||||
, label = train$label
|
||||
, max_depth = 2
|
||||
, eta = 1
|
||||
, nthread = 2
|
||||
, nrounds = 2
|
||||
, objective = "binary:logistic"
|
||||
data = as.matrix(train$data),
|
||||
label = train$label,
|
||||
max_depth = 2,
|
||||
eta = 1,
|
||||
nthread = 2,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
```
|
||||
|
||||
@ -188,14 +188,14 @@ bstDense <- xgboost(
|
||||
**XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.
|
||||
|
||||
```{r trainingDmatrix, message=F, warning=F}
|
||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
|
||||
bstDMatrix <- xgboost(
|
||||
data = dtrain
|
||||
, max_depth = 2
|
||||
, eta = 1
|
||||
, nthread = 2
|
||||
, nrounds = 2
|
||||
, objective = "binary:logistic"
|
||||
data = dtrain,
|
||||
max_depth = 2,
|
||||
eta = 1,
|
||||
nthread = 2,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
```
|
||||
|
||||
@ -314,8 +314,8 @@ Most of the features below have been implemented to help you to improve your mod
|
||||
For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
|
||||
|
||||
```{r DMatrix, message=F, warning=F}
|
||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
||||
dtest <- xgb.DMatrix(data = test$data, label = test$label)
|
||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
|
||||
dtest <- xgb.DMatrix(data = test$data, label = test$label, nthread = 2)
|
||||
```
|
||||
|
||||
### Measure learning progress with xgb.train
|
||||
@ -476,6 +476,7 @@ An interesting test to see how identical our saved model is to the original one
|
||||
```{r loadModel, message=F, warning=F}
|
||||
# load binary model to R
|
||||
bst2 <- xgb.load("xgboost.model")
|
||||
xgb.parameters(bst2) <- list(nthread = 2)
|
||||
pred2 <- predict(bst2, test$data)
|
||||
|
||||
# And now the test
|
||||
@ -500,6 +501,7 @@ print(class(rawVec))
|
||||
|
||||
# load binary model to R
|
||||
bst3 <- xgb.load(rawVec)
|
||||
xgb.parameters(bst3) <- list(nthread = 2)
|
||||
pred3 <- predict(bst3, test$data)
|
||||
|
||||
# pred2 should be identical to pred
|
||||
|
||||
@ -175,7 +175,7 @@ bst_preds == bst_from_json_preds
|
||||
|
||||
None are exactly equal again. What is going on here? Well, since we are using the value `1` in the calculations, we have introduced a double into the calculation. Because of this, all float values are promoted to 64-bit doubles and the 64-bit version of the exponentiation operator `exp` is also used. On the other hand, xgboost uses the 32-bit version of the exponentiation operator in its [sigmoid function](https://github.com/dmlc/xgboost/blob/54980b8959680a0da06a3fc0ec776e47c8cbb0a1/src/common/math.h#L25-L27).
|
||||
|
||||
How do we fix this? We have to ensure we use the correct data types everywhere and the correct operators. If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
|
||||
How do we fix this? We have to ensure we use the correct data types everywhere and the correct operators. If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied.
|
||||
```{r}
|
||||
# calculate the predictions casting doubles to floats
|
||||
bst_from_json_preds <- ifelse(
|
||||
|
||||
@ -80,6 +80,24 @@ R package versioning
|
||||
====================
|
||||
See :ref:`release`.
|
||||
|
||||
Testing R package with different compilers
|
||||
==========================================
|
||||
|
||||
You can change the default compiler of R by changing the configuration file in home
|
||||
directory. For instance, if you want to test XGBoost built with clang++ instead of g++ on
|
||||
Linux, put the following in your ``~/.R/Makevars`` file:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
CC=clang-15
|
||||
CXX17=clang++-15
|
||||
|
||||
Be aware that the variable name should match with the name used by ``R CMD``:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
R CMD config CXX17
|
||||
|
||||
Registering native routines in R
|
||||
================================
|
||||
According to `R extension manual <https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines>`_,
|
||||
|
||||
@ -50,6 +50,7 @@ inline void EllpackPageSource::Fetch() {
|
||||
// silent the warning about unused variables.
|
||||
(void)(row_stride_);
|
||||
(void)(is_dense_);
|
||||
(void)(device_);
|
||||
common::AssertGPUSupport();
|
||||
}
|
||||
#endif // !defined(XGBOOST_USE_CUDA)
|
||||
|
||||
@ -106,14 +106,30 @@ void GBTreeModel::Load(dmlc::Stream* fi) {
|
||||
Validate(*this);
|
||||
}
|
||||
|
||||
namespace {
|
||||
std::int32_t IOThreads(Context const* ctx) {
|
||||
CHECK(ctx);
|
||||
std::int32_t n_threads = ctx->Threads();
|
||||
// CRAN checks for number of threads used by examples, but we might not have the right
|
||||
// number of threads when serializing/unserializing models as nthread is a booster
|
||||
// parameter, which is only effective after booster initialization.
|
||||
//
|
||||
// The threshold ratio of CPU time to user time for R is 2.5, we set the number of
|
||||
// threads to 2.
|
||||
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
|
||||
n_threads = std::min(2, n_threads);
|
||||
#endif
|
||||
return n_threads;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void GBTreeModel::SaveModel(Json* p_out) const {
|
||||
auto& out = *p_out;
|
||||
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
|
||||
out["gbtree_model_param"] = ToJson(param);
|
||||
std::vector<Json> trees_json(trees.size());
|
||||
|
||||
CHECK(ctx_);
|
||||
common::ParallelFor(trees.size(), ctx_->Threads(), [&](auto t) {
|
||||
common::ParallelFor(trees.size(), IOThreads(ctx_), [&](auto t) {
|
||||
auto const& tree = trees[t];
|
||||
Json jtree{Object{}};
|
||||
tree->SaveModel(&jtree);
|
||||
@ -151,9 +167,7 @@ void GBTreeModel::LoadModel(Json const& in) {
|
||||
CHECK_EQ(tree_info_json.size(), param.num_trees);
|
||||
tree_info.resize(param.num_trees);
|
||||
|
||||
CHECK(ctx_);
|
||||
|
||||
common::ParallelFor(param.num_trees, ctx_->Threads(), [&](auto t) {
|
||||
common::ParallelFor(param.num_trees, IOThreads(ctx_), [&](auto t) {
|
||||
auto tree_id = get<Integer const>(trees_json[t]["id"]);
|
||||
trees.at(tree_id).reset(new RegTree{});
|
||||
trees[tree_id]->LoadModel(trees_json[t]);
|
||||
|
||||
@ -3,9 +3,15 @@ import argparse
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from platform import system
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
pd = None
|
||||
|
||||
from test_utils import R_PACKAGE, ROOT, DirectoryExcursion, cd, print_time, record_time
|
||||
|
||||
|
||||
@ -97,16 +103,47 @@ def build_rpackage(path: str) -> str:
|
||||
return tarball
|
||||
|
||||
|
||||
def check_example_timing(rcheck_dir: Path, threshold: float) -> None:
|
||||
with open(rcheck_dir / "xgboost-Ex.timings", "r") as fd:
|
||||
timings = fd.readlines()
|
||||
newlines = []
|
||||
for line in timings:
|
||||
line = line.strip()
|
||||
newlines.append(line)
|
||||
con_timings = "\n".join(newlines)
|
||||
df = pd.read_csv(StringIO(con_timings), delimiter="\t")
|
||||
ratio_n = "user/elapsed"
|
||||
df[ratio_n] = df["user"] / df["elapsed"]
|
||||
offending = df[df[ratio_n] > threshold]
|
||||
|
||||
try:
|
||||
# requires the tabulate package
|
||||
df.to_markdown("timings.md")
|
||||
offending.to_markdown("offending.md")
|
||||
except ImportError:
|
||||
print("failed to export markdown files.")
|
||||
pass
|
||||
|
||||
if offending.shape[0] == 0:
|
||||
return
|
||||
|
||||
print(offending)
|
||||
raise ValueError("There are examples using too many threads")
|
||||
|
||||
|
||||
@cd(ROOT)
|
||||
@record_time
|
||||
def check_rpackage(path: str) -> None:
|
||||
env = os.environ.copy()
|
||||
print("Ncpus:", f"{os.cpu_count()}")
|
||||
threshold = 2.5
|
||||
env.update(
|
||||
{
|
||||
"MAKEFLAGS": f"-j{os.cpu_count()}",
|
||||
# cran specific environment variables
|
||||
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(2.5),
|
||||
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
|
||||
"_R_CHECK_TEST_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
|
||||
"_R_CHECK_VIGNETTE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
|
||||
}
|
||||
)
|
||||
|
||||
@ -118,11 +155,14 @@ def check_rpackage(path: str) -> None:
|
||||
CC = os.path.join(mingw_bin, "gcc.exe")
|
||||
env.update({"CC": CC, "CXX": CXX})
|
||||
|
||||
status = subprocess.run([R, "CMD", "check", "--as-cran", path], env=env)
|
||||
with open(Path("xgboost.Rcheck") / "00check.log", "r") as fd:
|
||||
status = subprocess.run(
|
||||
[R, "CMD", "check", "--as-cran", "--timings", path], env=env
|
||||
)
|
||||
rcheck_dir = Path("xgboost.Rcheck")
|
||||
with open(rcheck_dir / "00check.log", "r") as fd:
|
||||
check_log = fd.read()
|
||||
|
||||
with open(Path("xgboost.Rcheck") / "00install.out", "r") as fd:
|
||||
with open(rcheck_dir / "00install.out", "r") as fd:
|
||||
install_log = fd.read()
|
||||
|
||||
msg = f"""
|
||||
@ -144,6 +184,8 @@ def check_rpackage(path: str) -> None:
|
||||
if check_log.find("Examples with CPU time") != -1:
|
||||
print(msg)
|
||||
raise ValueError("Suspicious NOTE.")
|
||||
if pd is not None:
|
||||
check_example_timing(rcheck_dir, threshold)
|
||||
|
||||
|
||||
@cd(R_PACKAGE)
|
||||
@ -264,6 +306,8 @@ def main(args: argparse.Namespace) -> None:
|
||||
test_with_autotools()
|
||||
else:
|
||||
test_with_cmake(args)
|
||||
elif args.task == "timings":
|
||||
check_example_timing(Path("xgboost.Rcheck"), 2.5)
|
||||
else:
|
||||
raise ValueError("Unexpected task.")
|
||||
|
||||
@ -279,7 +323,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--task",
|
||||
type=str,
|
||||
choices=["pack", "build", "check", "doc"],
|
||||
choices=["pack", "build", "check", "doc", "timings"],
|
||||
default="check",
|
||||
required=False,
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user