[R] Set number of threads in demos and tests. (#9591)
- Restrict the number of threads in IO. - Specify the number of threads in demos and tests. - Add helper scripts for checks.
This commit is contained in:
parent
def77870f3
commit
cac2cd2e94
@ -4,3 +4,5 @@
|
|||||||
^.*\.Rproj$
|
^.*\.Rproj$
|
||||||
^\.Rproj\.user$
|
^\.Rproj\.user$
|
||||||
README.md
|
README.md
|
||||||
|
^doc$
|
||||||
|
^Meta$
|
||||||
|
|||||||
@ -557,14 +557,18 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' #### Binary classification:
|
#' #### Binary classification:
|
||||||
#' #
|
#'
|
||||||
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||||
#' # without considering the 2nd order interactions:
|
#' # without considering the 2nd order interactions:
|
||||||
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||||
#' colnames(x)
|
#' colnames(x)
|
||||||
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
|
||||||
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||||
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||||
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
||||||
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
|
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
|
||||||
@ -594,9 +598,9 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#'
|
#'
|
||||||
#' #### Multiclass classification:
|
#' #### Multiclass classification:
|
||||||
#' #
|
#' #
|
||||||
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
|
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
|
||||||
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||||
#' lambda = 0.0003, alpha = 0.0003, nthread = 1)
|
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||||
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
||||||
#' # to use smaller eta to reduce instability
|
#' # to use smaller eta to reduce instability
|
||||||
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
||||||
|
|||||||
@ -267,11 +267,16 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
|||||||
#'
|
#'
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#'
|
||||||
|
#' ## Keep the number of threads to 2 for examples
|
||||||
|
#' nthread <- 2
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||||
#' eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
|
#' eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
|
||||||
#' # use all trees by default
|
#' # use all trees by default
|
||||||
#' pred <- predict(bst, test$data)
|
#' pred <- predict(bst, test$data)
|
||||||
#' # use only the 1st tree
|
#' # use only the 1st tree
|
||||||
@ -337,8 +342,14 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
|||||||
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
|
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
|
||||||
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
||||||
|
|
||||||
if (!inherits(newdata, "xgb.DMatrix"))
|
if (!inherits(newdata, "xgb.DMatrix")) {
|
||||||
newdata <- xgb.DMatrix(newdata, missing = missing, nthread = NVL(object$params[["nthread"]], -1))
|
config <- jsonlite::fromJSON(xgb.config(object))
|
||||||
|
nthread <- strtoi(config$learner$generic_param$nthread)
|
||||||
|
newdata <- xgb.DMatrix(
|
||||||
|
newdata,
|
||||||
|
missing = missing, nthread = NVL(nthread, -1)
|
||||||
|
)
|
||||||
|
}
|
||||||
if (!is.null(object[["feature_names"]]) &&
|
if (!is.null(object[["feature_names"]]) &&
|
||||||
!is.null(colnames(newdata)) &&
|
!is.null(colnames(newdata)) &&
|
||||||
!identical(object[["feature_names"]], colnames(newdata)))
|
!identical(object[["feature_names"]], colnames(newdata)))
|
||||||
@ -628,10 +639,15 @@ xgb.attributes <- function(object) {
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
#' bst <- xgboost(
|
||||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
#' data = train$data, label = train$label, max_depth = 2,
|
||||||
|
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||||
|
#' )
|
||||||
#' config <- xgb.config(bst)
|
#' config <- xgb.config(bst)
|
||||||
#'
|
#'
|
||||||
#' @rdname xgb.config
|
#' @rdname xgb.config
|
||||||
|
|||||||
@ -18,7 +18,12 @@
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#' dtrain <- with(
|
||||||
|
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
#' )
|
||||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@ -22,14 +22,23 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#'
|
||||||
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
#' bst <- xgboost(
|
||||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||||
|
#' nthread = nthread,
|
||||||
|
#' nrounds = 2,
|
||||||
|
#' objective = "binary:logistic"
|
||||||
|
#' )
|
||||||
|
#'
|
||||||
#' xgb.save(bst, 'xgb.model')
|
#' xgb.save(bst, 'xgb.model')
|
||||||
#' bst <- xgb.load('xgb.model')
|
#' bst <- xgb.load('xgb.model')
|
||||||
#' if (file.exists('xgb.model')) file.remove('xgb.model')
|
#' if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||||
#' pred <- predict(bst, test$data)
|
|
||||||
#' @export
|
#' @export
|
||||||
xgb.load <- function(modelfile) {
|
xgb.load <- function(modelfile) {
|
||||||
if (is.null(modelfile))
|
if (is.null(modelfile))
|
||||||
|
|||||||
@ -46,9 +46,12 @@
|
|||||||
#' # Basic use:
|
#' # Basic use:
|
||||||
#'
|
#'
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
||||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||||
#'
|
#'
|
||||||
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
||||||
#'
|
#'
|
||||||
|
|||||||
@ -45,10 +45,13 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#'
|
#'
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' ## Keep the number of threads to 2 for examples
|
||||||
|
#' nthread <- 2
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
#'
|
#'
|
||||||
#' # Change max_depth to a higher number to get a more significant result
|
#' ## Change max_depth to a higher number to get a more significant result
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
||||||
#' eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
|
#' eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
|
||||||
#' subsample = 0.5, min_child_weight = 2)
|
#' subsample = 0.5, min_child_weight = 2)
|
||||||
#'
|
#'
|
||||||
#' xgb.plot.deepness(bst)
|
#' xgb.plot.deepness(bst)
|
||||||
|
|||||||
@ -45,9 +45,14 @@
|
|||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train)
|
#' data(agaricus.train)
|
||||||
|
#' ## Keep the number of threads to 2 for examples
|
||||||
|
#' nthread <- 2
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
#' bst <- xgboost(
|
||||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||||
|
#' eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||||
|
#' )
|
||||||
#'
|
#'
|
||||||
#' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
#' importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
||||||
#'
|
#'
|
||||||
|
|||||||
@ -43,10 +43,15 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#'
|
#'
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' ## Keep the number of threads to 2 for examples
|
||||||
|
#' nthread <- 2
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
#' bst <- xgboost(
|
||||||
#' eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
#' data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||||
#' min_child_weight = 50, verbose = 0)
|
#' eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
|
||||||
|
#' min_child_weight = 50, verbose = 0
|
||||||
|
#' )
|
||||||
#'
|
#'
|
||||||
#' p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
#' p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
||||||
#' print(p)
|
#' print(p)
|
||||||
|
|||||||
@ -74,9 +74,14 @@
|
|||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#' nrounds <- 20
|
||||||
|
#'
|
||||||
|
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
|
||||||
#' eta = 0.1, max_depth = 3, subsample = .5,
|
#' eta = 0.1, max_depth = 3, subsample = .5,
|
||||||
#' method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
|
#' method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
|
||||||
#'
|
#'
|
||||||
#' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
#' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
||||||
#' contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
#' contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
||||||
@ -85,12 +90,11 @@
|
|||||||
#'
|
#'
|
||||||
#' # multiclass example - plots for each class separately:
|
#' # multiclass example - plots for each class separately:
|
||||||
#' nclass <- 3
|
#' nclass <- 3
|
||||||
#' nrounds <- 20
|
|
||||||
#' x <- as.matrix(iris[, -5])
|
#' x <- as.matrix(iris[, -5])
|
||||||
#' set.seed(123)
|
#' set.seed(123)
|
||||||
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
||||||
#' mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
#' mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
||||||
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
|
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
|
||||||
#' objective = "multi:softprob", num_class = nclass, verbose = 0)
|
#' objective = "multi:softprob", num_class = nclass, verbose = 0)
|
||||||
#' trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
#' trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
||||||
#' col <- rgb(0, 0, 1, 0.5)
|
#' col <- rgb(0, 0, 1, 0.5)
|
||||||
|
|||||||
@ -25,14 +25,22 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#'
|
||||||
|
#' ## Keep the number of threads to 1 for examples
|
||||||
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
#' bst <- xgboost(
|
||||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
#' data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||||
|
#' nthread = nthread,
|
||||||
|
#' nrounds = 2,
|
||||||
|
#' objective = "binary:logistic"
|
||||||
|
#' )
|
||||||
#' xgb.save(bst, 'xgb.model')
|
#' xgb.save(bst, 'xgb.model')
|
||||||
#' bst <- xgb.load('xgb.model')
|
#' bst <- xgb.load('xgb.model')
|
||||||
#' if (file.exists('xgb.model')) file.remove('xgb.model')
|
#' if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||||
#' pred <- predict(bst, test$data)
|
|
||||||
#' @export
|
#' @export
|
||||||
xgb.save <- function(model, fname) {
|
xgb.save <- function(model, fname) {
|
||||||
if (typeof(fname) != "character")
|
if (typeof(fname) != "character")
|
||||||
|
|||||||
@ -16,13 +16,18 @@
|
|||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#'
|
||||||
|
#' ## Keep the number of threads to 2 for examples
|
||||||
|
#' nthread <- 2
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
#' test <- agaricus.test
|
#' test <- agaricus.test
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||||
|
#'
|
||||||
#' raw <- xgb.save.raw(bst)
|
#' raw <- xgb.save.raw(bst)
|
||||||
#' bst <- xgb.load.raw(raw)
|
#' bst <- xgb.load.raw(raw)
|
||||||
#' pred <- predict(bst, test$data)
|
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.save.raw <- function(model, raw_format = "deprecated") {
|
xgb.save.raw <- function(model, raw_format = "deprecated") {
|
||||||
|
|||||||
@ -168,7 +168,8 @@
|
|||||||
#' than the \code{xgboost} interface.
|
#' than the \code{xgboost} interface.
|
||||||
#'
|
#'
|
||||||
#' Parallelization is automatically enabled if \code{OpenMP} is present.
|
#' Parallelization is automatically enabled if \code{OpenMP} is present.
|
||||||
#' Number of threads can also be manually specified via \code{nthread} parameter.
|
#' Number of threads can also be manually specified via the \code{nthread}
|
||||||
|
#' parameter.
|
||||||
#'
|
#'
|
||||||
#' The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
#' The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
||||||
#' when the \code{eval_metric} parameter is not provided.
|
#' when the \code{eval_metric} parameter is not provided.
|
||||||
@ -237,17 +238,25 @@
|
|||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
#'
|
#'
|
||||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
#' ## Keep the number of threads to 1 for examples
|
||||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
#' nthread <- 1
|
||||||
|
#' data.table::setDTthreads(nthread)
|
||||||
|
#'
|
||||||
|
#' dtrain <- with(
|
||||||
|
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
#' )
|
||||||
|
#' dtest <- with(
|
||||||
|
#' agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
#' )
|
||||||
#' watchlist <- list(train = dtrain, eval = dtest)
|
#' watchlist <- list(train = dtrain, eval = dtest)
|
||||||
#'
|
#'
|
||||||
#' ## A simple xgb.train example:
|
#' ## A simple xgb.train example:
|
||||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
#' objective = "binary:logistic", eval_metric = "auc")
|
#' objective = "binary:logistic", eval_metric = "auc")
|
||||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||||
#'
|
#'
|
||||||
#'
|
#' ## An xgb.train example where custom objective and evaluation metric are
|
||||||
#' ## An xgb.train example where custom objective and evaluation metric are used:
|
#' ## used:
|
||||||
#' logregobj <- function(preds, dtrain) {
|
#' logregobj <- function(preds, dtrain) {
|
||||||
#' labels <- getinfo(dtrain, "label")
|
#' labels <- getinfo(dtrain, "label")
|
||||||
#' preds <- 1/(1 + exp(-preds))
|
#' preds <- 1/(1 + exp(-preds))
|
||||||
@ -263,12 +272,12 @@
|
|||||||
#'
|
#'
|
||||||
#' # These functions could be used by passing them either:
|
#' # These functions could be used by passing them either:
|
||||||
#' # as 'objective' and 'eval_metric' parameters in the params list:
|
#' # as 'objective' and 'eval_metric' parameters in the params list:
|
||||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
#' objective = logregobj, eval_metric = evalerror)
|
#' objective = logregobj, eval_metric = evalerror)
|
||||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||||
#'
|
#'
|
||||||
#' # or through the ... arguments:
|
#' # or through the ... arguments:
|
||||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
|
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
|
||||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||||
#' objective = logregobj, eval_metric = evalerror)
|
#' objective = logregobj, eval_metric = evalerror)
|
||||||
#'
|
#'
|
||||||
@ -278,7 +287,7 @@
|
|||||||
#'
|
#'
|
||||||
#'
|
#'
|
||||||
#' ## An xgb.train example of using variable learning rates at each iteration:
|
#' ## An xgb.train example of using variable learning rates at each iteration:
|
||||||
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
#' param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
#' objective = "binary:logistic", eval_metric = "auc")
|
#' objective = "binary:logistic", eval_metric = "auc")
|
||||||
#' my_etas <- list(eta = c(0.5, 0.1))
|
#' my_etas <- list(eta = c(0.5, 0.1))
|
||||||
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||||
@ -290,7 +299,7 @@
|
|||||||
#'
|
#'
|
||||||
#' ## An 'xgboost' interface example:
|
#' ## An 'xgboost' interface example:
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
||||||
#' max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
#' max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
|
||||||
#' objective = "binary:logistic")
|
#' objective = "binary:logistic")
|
||||||
#' pred <- predict(bst, agaricus.test$data)
|
#' pred <- predict(bst, agaricus.test$data)
|
||||||
#'
|
#'
|
||||||
|
|||||||
@ -35,14 +35,18 @@ Callback function expects the following values to be set in its calling frame:
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
#### Binary classification:
|
#### Binary classification:
|
||||||
#
|
|
||||||
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||||
# without considering the 2nd order interactions:
|
# without considering the 2nd order interactions:
|
||||||
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||||
colnames(x)
|
colnames(x)
|
||||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2)
|
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
|
||||||
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||||
lambda = 0.0003, alpha = 0.0003, nthread = 2)
|
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||||
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||||
# unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
# unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
||||||
# rate does not break the convergence, but allows us to illustrate the typical pattern of
|
# rate does not break the convergence, but allows us to illustrate the typical pattern of
|
||||||
@ -72,9 +76,9 @@ matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
|||||||
|
|
||||||
#### Multiclass classification:
|
#### Multiclass classification:
|
||||||
#
|
#
|
||||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 1)
|
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
|
||||||
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||||
lambda = 0.0003, alpha = 0.0003, nthread = 1)
|
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||||
# For the default linear updater 'shotgun' it sometimes is helpful
|
# For the default linear updater 'shotgun' it sometimes is helpful
|
||||||
# to use smaller eta to reduce instability
|
# to use smaller eta to reduce instability
|
||||||
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
||||||
|
|||||||
@ -132,11 +132,16 @@ Note also that converting a matrix to \code{\link{xgb.DMatrix}} uses multiple th
|
|||||||
|
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
## Keep the number of threads to 2 for examples
|
||||||
|
nthread <- 2
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
|
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||||
eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
|
eta = 0.5, nthread = nthread, nrounds = 5, objective = "binary:logistic")
|
||||||
# use all trees by default
|
# use all trees by default
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
# use only the 1st tree
|
# use only the 1st tree
|
||||||
|
|||||||
@ -38,7 +38,12 @@ Supported input file formats are either a LIBSVM text file or a binary file that
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
dtrain <- with(
|
||||||
|
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
)
|
||||||
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
|
||||||
|
|||||||
@ -19,10 +19,15 @@ Accessors for model parameters as JSON string.
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
|
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
data = train$data, label = train$label, max_depth = 2,
|
||||||
|
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||||
|
)
|
||||||
config <- xgb.config(bst)
|
config <- xgb.config(bst)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -27,14 +27,23 @@ not \code{xgb.load}.
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||||
|
nthread = nthread,
|
||||||
|
nrounds = 2,
|
||||||
|
objective = "binary:logistic"
|
||||||
|
)
|
||||||
|
|
||||||
xgb.save(bst, 'xgb.model')
|
xgb.save(bst, 'xgb.model')
|
||||||
bst <- xgb.load('xgb.model')
|
bst <- xgb.load('xgb.model')
|
||||||
if (file.exists('xgb.model')) file.remove('xgb.model')
|
if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||||
pred <- predict(bst, test$data)
|
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
|
\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
|
||||||
|
|||||||
@ -66,9 +66,12 @@ Parse a boosted tree model text dump into a \code{data.table} structure.
|
|||||||
# Basic use:
|
# Basic use:
|
||||||
|
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
||||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
||||||
|
|
||||||
|
|||||||
@ -61,10 +61,13 @@ This function was inspired by the blog post
|
|||||||
\examples{
|
\examples{
|
||||||
|
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
## Keep the number of threads to 2 for examples
|
||||||
|
nthread <- 2
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
# Change max_depth to a higher number to get a more significant result
|
## Change max_depth to a higher number to get a more significant result
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 6,
|
||||||
eta = 0.1, nthread = 2, nrounds = 50, objective = "binary:logistic",
|
eta = 0.1, nthread = nthread, nrounds = 50, objective = "binary:logistic",
|
||||||
subsample = 0.5, min_child_weight = 2)
|
subsample = 0.5, min_child_weight = 2)
|
||||||
|
|
||||||
xgb.plot.deepness(bst)
|
xgb.plot.deepness(bst)
|
||||||
|
|||||||
@ -77,9 +77,14 @@ with bar colors corresponding to different clusters that have somewhat similar i
|
|||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train)
|
data(agaricus.train)
|
||||||
|
## Keep the number of threads to 2 for examples
|
||||||
|
nthread <- 2
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||||
|
eta = 1, nthread = nthread, nrounds = 2, objective = "binary:logistic"
|
||||||
|
)
|
||||||
|
|
||||||
importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
importance_matrix <- xgb.importance(colnames(agaricus.train$data), model = bst)
|
||||||
|
|
||||||
|
|||||||
@ -63,10 +63,15 @@ This function is inspired by this blog post:
|
|||||||
\examples{
|
\examples{
|
||||||
|
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
## Keep the number of threads to 2 for examples
|
||||||
|
nthread <- 2
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||||
min_child_weight = 50, verbose = 0)
|
eta = 1, nthread = nthread, nrounds = 30, objective = "binary:logistic",
|
||||||
|
min_child_weight = 50, verbose = 0
|
||||||
|
)
|
||||||
|
|
||||||
p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
||||||
print(p)
|
print(p)
|
||||||
|
|||||||
@ -124,9 +124,14 @@ a meaningful thing to do.
|
|||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
nrounds <- 20
|
||||||
|
|
||||||
|
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = nrounds,
|
||||||
eta = 0.1, max_depth = 3, subsample = .5,
|
eta = 0.1, max_depth = 3, subsample = .5,
|
||||||
method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
|
method = "hist", objective = "binary:logistic", nthread = nthread, verbose = 0)
|
||||||
|
|
||||||
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
||||||
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
||||||
@ -135,12 +140,11 @@ xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12) # S
|
|||||||
|
|
||||||
# multiclass example - plots for each class separately:
|
# multiclass example - plots for each class separately:
|
||||||
nclass <- 3
|
nclass <- 3
|
||||||
nrounds <- 20
|
|
||||||
x <- as.matrix(iris[, -5])
|
x <- as.matrix(iris[, -5])
|
||||||
set.seed(123)
|
set.seed(123)
|
||||||
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
||||||
mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
||||||
max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
|
max_depth = 2, eta = 0.3, subsample = .5, nthread = nthread,
|
||||||
objective = "multi:softprob", num_class = nclass, verbose = 0)
|
objective = "multi:softprob", num_class = nclass, verbose = 0)
|
||||||
trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
||||||
col <- rgb(0, 0, 1, 0.5)
|
col <- rgb(0, 0, 1, 0.5)
|
||||||
|
|||||||
@ -31,14 +31,22 @@ releases of XGBoost.
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
## Keep the number of threads to 1 for examples
|
||||||
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||||
|
nthread = nthread,
|
||||||
|
nrounds = 2,
|
||||||
|
objective = "binary:logistic"
|
||||||
|
)
|
||||||
xgb.save(bst, 'xgb.model')
|
xgb.save(bst, 'xgb.model')
|
||||||
bst <- xgb.load('xgb.model')
|
bst <- xgb.load('xgb.model')
|
||||||
if (file.exists('xgb.model')) file.remove('xgb.model')
|
if (file.exists('xgb.model')) file.remove('xgb.model')
|
||||||
pred <- predict(bst, test$data)
|
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
|
\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
|
||||||
|
|||||||
@ -25,12 +25,17 @@ Save xgboost model from xgboost or xgb.train
|
|||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
## Keep the number of threads to 2 for examples
|
||||||
|
nthread <- 2
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
raw <- xgb.save.raw(bst)
|
raw <- xgb.save.raw(bst)
|
||||||
bst <- xgb.load.raw(raw)
|
bst <- xgb.load.raw(raw)
|
||||||
pred <- predict(bst, test$data)
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -250,7 +250,8 @@ customized objective and evaluation metric functions, therefore it is more flexi
|
|||||||
than the \code{xgboost} interface.
|
than the \code{xgboost} interface.
|
||||||
|
|
||||||
Parallelization is automatically enabled if \code{OpenMP} is present.
|
Parallelization is automatically enabled if \code{OpenMP} is present.
|
||||||
Number of threads can also be manually specified via \code{nthread} parameter.
|
Number of threads can also be manually specified via the \code{nthread}
|
||||||
|
parameter.
|
||||||
|
|
||||||
The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
The evaluation metric is chosen automatically by XGBoost (according to the objective)
|
||||||
when the \code{eval_metric} parameter is not provided.
|
when the \code{eval_metric} parameter is not provided.
|
||||||
@ -286,17 +287,25 @@ The following callbacks are automatically created when certain parameters are se
|
|||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
data(agaricus.test, package='xgboost')
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
## Keep the number of threads to 1 for examples
|
||||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
nthread <- 1
|
||||||
|
data.table::setDTthreads(nthread)
|
||||||
|
|
||||||
|
dtrain <- with(
|
||||||
|
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
)
|
||||||
|
dtest <- with(
|
||||||
|
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||||
|
)
|
||||||
watchlist <- list(train = dtrain, eval = dtest)
|
watchlist <- list(train = dtrain, eval = dtest)
|
||||||
|
|
||||||
## A simple xgb.train example:
|
## A simple xgb.train example:
|
||||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
objective = "binary:logistic", eval_metric = "auc")
|
objective = "binary:logistic", eval_metric = "auc")
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||||
|
|
||||||
|
## An xgb.train example where custom objective and evaluation metric are
|
||||||
## An xgb.train example where custom objective and evaluation metric are used:
|
## used:
|
||||||
logregobj <- function(preds, dtrain) {
|
logregobj <- function(preds, dtrain) {
|
||||||
labels <- getinfo(dtrain, "label")
|
labels <- getinfo(dtrain, "label")
|
||||||
preds <- 1/(1 + exp(-preds))
|
preds <- 1/(1 + exp(-preds))
|
||||||
@ -312,12 +321,12 @@ evalerror <- function(preds, dtrain) {
|
|||||||
|
|
||||||
# These functions could be used by passing them either:
|
# These functions could be used by passing them either:
|
||||||
# as 'objective' and 'eval_metric' parameters in the params list:
|
# as 'objective' and 'eval_metric' parameters in the params list:
|
||||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
objective = logregobj, eval_metric = evalerror)
|
objective = logregobj, eval_metric = evalerror)
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||||
|
|
||||||
# or through the ... arguments:
|
# or through the ... arguments:
|
||||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2)
|
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread)
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||||
objective = logregobj, eval_metric = evalerror)
|
objective = logregobj, eval_metric = evalerror)
|
||||||
|
|
||||||
@ -327,7 +336,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
|||||||
|
|
||||||
|
|
||||||
## An xgb.train example of using variable learning rates at each iteration:
|
## An xgb.train example of using variable learning rates at each iteration:
|
||||||
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, verbose = 0, nthread = nthread,
|
||||||
objective = "binary:logistic", eval_metric = "auc")
|
objective = "binary:logistic", eval_metric = "auc")
|
||||||
my_etas <- list(eta = c(0.5, 0.1))
|
my_etas <- list(eta = c(0.5, 0.1))
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||||
@ -339,7 +348,7 @@ bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
|
|||||||
|
|
||||||
## An 'xgboost' interface example:
|
## An 'xgboost' interface example:
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
||||||
max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
|
||||||
objective = "binary:logistic")
|
objective = "binary:logistic")
|
||||||
pred <- predict(bst, agaricus.test$data)
|
pred <- predict(bst, agaricus.test$data)
|
||||||
|
|
||||||
|
|||||||
25
R-package/tests/helper_scripts/run-examples.R
Normal file
25
R-package/tests/helper_scripts/run-examples.R
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
## Helper script for running individual examples.
|
||||||
|
library(pkgload)
|
||||||
|
library(xgboost)
|
||||||
|
|
||||||
|
files <- list.files("./man")
|
||||||
|
|
||||||
|
|
||||||
|
run_example_timeit <- function(f) {
|
||||||
|
path <- paste("./man/", f, sep = "")
|
||||||
|
print(paste("Test", f))
|
||||||
|
flush.console()
|
||||||
|
t0 <- proc.time()
|
||||||
|
run_example(path)
|
||||||
|
t1 <- proc.time()
|
||||||
|
list(file = f, time = t1 - t0)
|
||||||
|
}
|
||||||
|
|
||||||
|
timings <- lapply(files, run_example_timeit)
|
||||||
|
|
||||||
|
for (t in timings) {
|
||||||
|
ratio <- t$time[1] / t$time[3]
|
||||||
|
if (!is.na(ratio) && !is.infinite(ratio) && ratio >= 2.5) {
|
||||||
|
print(paste("Offending example:", t$file, ratio))
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,7 +1,7 @@
|
|||||||
context("basic functions")
|
context("basic functions")
|
||||||
|
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = "xgboost")
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = "xgboost")
|
||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
set.seed(1994)
|
set.seed(1994)
|
||||||
@ -9,15 +9,20 @@ set.seed(1994)
|
|||||||
# disable some tests for Win32
|
# disable some tests for Win32
|
||||||
windows_flag <- .Platform$OS.type == "windows" &&
|
windows_flag <- .Platform$OS.type == "windows" &&
|
||||||
.Machine$sizeof.pointer != 8
|
.Machine$sizeof.pointer != 8
|
||||||
solaris_flag <- (Sys.info()['sysname'] == "SunOS")
|
solaris_flag <- (Sys.info()["sysname"] == "SunOS")
|
||||||
|
n_threads <- 1
|
||||||
|
|
||||||
|
|
||||||
test_that("train and predict binary classification", {
|
test_that("train and predict binary classification", {
|
||||||
nrounds <- 2
|
nrounds <- 2
|
||||||
expect_output(
|
expect_output(
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
|
data = train$data, label = train$label, max_depth = 2,
|
||||||
eval_metric = "error")
|
eta = 1, nthread = n_threads, nrounds = nrounds,
|
||||||
, "train-error")
|
objective = "binary:logistic", eval_metric = "error"
|
||||||
|
),
|
||||||
|
"train-error"
|
||||||
|
)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
expect_equal(bst$niter, nrounds)
|
expect_equal(bst$niter, nrounds)
|
||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
@ -46,26 +51,39 @@ test_that("parameter validation works", {
|
|||||||
d <- cbind(
|
d <- cbind(
|
||||||
x1 = rnorm(10),
|
x1 = rnorm(10),
|
||||||
x2 = rnorm(10),
|
x2 = rnorm(10),
|
||||||
x3 = rnorm(10))
|
x3 = rnorm(10)
|
||||||
|
)
|
||||||
y <- d[, "x1"] + d[, "x2"]^2 +
|
y <- d[, "x1"] + d[, "x2"]^2 +
|
||||||
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
|
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
|
||||||
rnorm(10)
|
rnorm(10)
|
||||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||||
|
|
||||||
correct <- function() {
|
correct <- function() {
|
||||||
params <- list(max_depth = 2, booster = "dart",
|
params <- list(
|
||||||
rate_drop = 0.5, one_drop = TRUE,
|
max_depth = 2,
|
||||||
objective = "reg:squarederror")
|
booster = "dart",
|
||||||
|
rate_drop = 0.5,
|
||||||
|
one_drop = TRUE,
|
||||||
|
nthread = n_threads,
|
||||||
|
objective = "reg:squarederror"
|
||||||
|
)
|
||||||
xgb.train(params = params, data = dtrain, nrounds = nrounds)
|
xgb.train(params = params, data = dtrain, nrounds = nrounds)
|
||||||
}
|
}
|
||||||
expect_silent(correct())
|
expect_silent(correct())
|
||||||
incorrect <- function() {
|
incorrect <- function() {
|
||||||
params <- list(max_depth = 2, booster = "dart",
|
params <- list(
|
||||||
rate_drop = 0.5, one_drop = TRUE,
|
max_depth = 2,
|
||||||
|
booster = "dart",
|
||||||
|
rate_drop = 0.5,
|
||||||
|
one_drop = TRUE,
|
||||||
objective = "reg:squarederror",
|
objective = "reg:squarederror",
|
||||||
foo = "bar", bar = "foo")
|
nthread = n_threads,
|
||||||
|
foo = "bar",
|
||||||
|
bar = "foo"
|
||||||
|
)
|
||||||
output <- capture.output(
|
output <- capture.output(
|
||||||
xgb.train(params = params, data = dtrain, nrounds = nrounds))
|
xgb.train(params = params, data = dtrain, nrounds = nrounds)
|
||||||
|
)
|
||||||
print(output)
|
print(output)
|
||||||
}
|
}
|
||||||
expect_output(incorrect(), '\\\\"bar\\\\", \\\\"foo\\\\"')
|
expect_output(incorrect(), '\\\\"bar\\\\", \\\\"foo\\\\"')
|
||||||
@ -79,7 +97,8 @@ test_that("dart prediction works", {
|
|||||||
d <- cbind(
|
d <- cbind(
|
||||||
x1 = rnorm(100),
|
x1 = rnorm(100),
|
||||||
x2 = rnorm(100),
|
x2 = rnorm(100),
|
||||||
x3 = rnorm(100))
|
x3 = rnorm(100)
|
||||||
|
)
|
||||||
y <- d[, "x1"] + d[, "x2"]^2 +
|
y <- d[, "x1"] + d[, "x2"]^2 +
|
||||||
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
|
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
|
||||||
rnorm(100)
|
rnorm(100)
|
||||||
@ -93,7 +112,7 @@ test_that("dart prediction works", {
|
|||||||
rate_drop = 0.5,
|
rate_drop = 0.5,
|
||||||
one_drop = TRUE,
|
one_drop = TRUE,
|
||||||
eta = 1,
|
eta = 1,
|
||||||
nthread = 2,
|
nthread = n_threads,
|
||||||
nrounds = nrounds,
|
nrounds = nrounds,
|
||||||
objective = "reg:squarederror"
|
objective = "reg:squarederror"
|
||||||
)
|
)
|
||||||
@ -105,7 +124,7 @@ test_that("dart prediction works", {
|
|||||||
expect_false(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
|
expect_false(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
|
||||||
|
|
||||||
set.seed(1994)
|
set.seed(1994)
|
||||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||||
booster_by_train <- xgb.train(
|
booster_by_train <- xgb.train(
|
||||||
params = list(
|
params = list(
|
||||||
booster = "dart",
|
booster = "dart",
|
||||||
@ -113,7 +132,7 @@ test_that("dart prediction works", {
|
|||||||
eta = 1,
|
eta = 1,
|
||||||
rate_drop = 0.5,
|
rate_drop = 0.5,
|
||||||
one_drop = TRUE,
|
one_drop = TRUE,
|
||||||
nthread = 1,
|
nthread = n_threads,
|
||||||
objective = "reg:squarederror"
|
objective = "reg:squarederror"
|
||||||
),
|
),
|
||||||
data = dtrain,
|
data = dtrain,
|
||||||
@ -132,10 +151,13 @@ test_that("train and predict softprob", {
|
|||||||
lb <- as.numeric(iris$Species) - 1
|
lb <- as.numeric(iris$Species) - 1
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
expect_output(
|
expect_output(
|
||||||
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
|
bst <- xgboost(
|
||||||
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
|
data = as.matrix(iris[, -5]), label = lb,
|
||||||
objective = "multi:softprob", num_class = 3, eval_metric = "merror")
|
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
|
||||||
, "train-merror")
|
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
|
||||||
|
),
|
||||||
|
"train-merror"
|
||||||
|
)
|
||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
||||||
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
||||||
@ -164,9 +186,10 @@ test_that("train and predict softprob", {
|
|||||||
x3 = rnorm(100)
|
x3 = rnorm(100)
|
||||||
)
|
)
|
||||||
y <- sample.int(10, 100, replace = TRUE) - 1
|
y <- sample.int(10, 100, replace = TRUE) - 1
|
||||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||||
booster <- xgb.train(
|
booster <- xgb.train(
|
||||||
params = list(tree_method = "hist"), data = dtrain, nrounds = 4, num_class = 10,
|
params = list(tree_method = "hist", nthread = n_threads),
|
||||||
|
data = dtrain, nrounds = 4, num_class = 10,
|
||||||
objective = "multi:softprob"
|
objective = "multi:softprob"
|
||||||
)
|
)
|
||||||
predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE)
|
predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE)
|
||||||
@ -178,10 +201,13 @@ test_that("train and predict softmax", {
|
|||||||
lb <- as.numeric(iris$Species) - 1
|
lb <- as.numeric(iris$Species) - 1
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
expect_output(
|
expect_output(
|
||||||
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
|
bst <- xgboost(
|
||||||
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
|
data = as.matrix(iris[, -5]), label = lb,
|
||||||
objective = "multi:softmax", num_class = 3, eval_metric = "merror")
|
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
|
||||||
, "train-merror")
|
objective = "multi:softmax", num_class = 3, eval_metric = "merror"
|
||||||
|
),
|
||||||
|
"train-merror"
|
||||||
|
)
|
||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
||||||
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
||||||
@ -196,16 +222,19 @@ test_that("train and predict RF", {
|
|||||||
set.seed(11)
|
set.seed(11)
|
||||||
lb <- train$label
|
lb <- train$label
|
||||||
# single iteration
|
# single iteration
|
||||||
bst <- xgboost(data = train$data, label = lb, max_depth = 5,
|
bst <- xgboost(
|
||||||
nthread = 2, nrounds = 1, objective = "binary:logistic", eval_metric = "error",
|
data = train$data, label = lb, max_depth = 5,
|
||||||
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
|
nthread = n_threads,
|
||||||
|
nrounds = 1, objective = "binary:logistic", eval_metric = "error",
|
||||||
|
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1
|
||||||
|
)
|
||||||
expect_equal(bst$niter, 1)
|
expect_equal(bst$niter, 1)
|
||||||
expect_equal(xgb.ntree(bst), 20)
|
expect_equal(xgb.ntree(bst), 20)
|
||||||
|
|
||||||
pred <- predict(bst, train$data)
|
pred <- predict(bst, train$data)
|
||||||
pred_err <- sum((pred > 0.5) != lb) / length(lb)
|
pred_err <- sum((pred > 0.5) != lb) / length(lb)
|
||||||
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
|
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
|
||||||
#expect_lt(pred_err, 0.03)
|
# expect_lt(pred_err, 0.03)
|
||||||
|
|
||||||
pred <- predict(bst, train$data, ntreelimit = 20)
|
pred <- predict(bst, train$data, ntreelimit = 20)
|
||||||
pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
|
pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
|
||||||
@ -219,11 +248,13 @@ test_that("train and predict RF with softprob", {
|
|||||||
lb <- as.numeric(iris$Species) - 1
|
lb <- as.numeric(iris$Species) - 1
|
||||||
nrounds <- 15
|
nrounds <- 15
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
|
bst <- xgboost(
|
||||||
max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds,
|
data = as.matrix(iris[, -5]), label = lb,
|
||||||
|
max_depth = 3, eta = 0.9, nthread = n_threads, nrounds = nrounds,
|
||||||
objective = "multi:softprob", eval_metric = "merror",
|
objective = "multi:softprob", eval_metric = "merror",
|
||||||
num_class = 3, verbose = 0,
|
num_class = 3, verbose = 0,
|
||||||
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5)
|
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5
|
||||||
|
)
|
||||||
expect_equal(bst$niter, 15)
|
expect_equal(bst$niter, 15)
|
||||||
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
|
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
|
||||||
# predict for all iterations:
|
# predict for all iterations:
|
||||||
@ -240,18 +271,24 @@ test_that("train and predict RF with softprob", {
|
|||||||
|
|
||||||
test_that("use of multiple eval metrics works", {
|
test_that("use of multiple eval metrics works", {
|
||||||
expect_output(
|
expect_output(
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = train$data, label = train$label, max_depth = 2,
|
||||||
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
, "train-error.*train-auc.*train-logloss")
|
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
|
||||||
|
),
|
||||||
|
"train-error.*train-auc.*train-logloss"
|
||||||
|
)
|
||||||
expect_false(is.null(bst$evaluation_log))
|
expect_false(is.null(bst$evaluation_log))
|
||||||
expect_equal(dim(bst$evaluation_log), c(2, 4))
|
expect_equal(dim(bst$evaluation_log), c(2, 4))
|
||||||
expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
||||||
expect_output(
|
expect_output(
|
||||||
bst2 <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst2 <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = train$data, label = train$label, max_depth = 2,
|
||||||
eval_metric = list("error", "auc", "logloss"))
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
, "train-error.*train-auc.*train-logloss")
|
eval_metric = list("error", "auc", "logloss")
|
||||||
|
),
|
||||||
|
"train-error.*train-auc.*train-logloss"
|
||||||
|
)
|
||||||
expect_false(is.null(bst2$evaluation_log))
|
expect_false(is.null(bst2$evaluation_log))
|
||||||
expect_equal(dim(bst2$evaluation_log), c(2, 4))
|
expect_equal(dim(bst2$evaluation_log), c(2, 4))
|
||||||
expect_equal(colnames(bst2$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
expect_equal(colnames(bst2$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
||||||
@ -259,9 +296,11 @@ test_that("use of multiple eval metrics works", {
|
|||||||
|
|
||||||
|
|
||||||
test_that("training continuation works", {
|
test_that("training continuation works", {
|
||||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
|
||||||
watchlist <- list(train = dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
|
param <- list(
|
||||||
|
objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
|
||||||
|
)
|
||||||
|
|
||||||
# for the reference, use 4 iterations at once:
|
# for the reference, use 4 iterations at once:
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
@ -271,30 +310,33 @@ test_that("training continuation works", {
|
|||||||
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
|
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
|
||||||
# continue for two more:
|
# continue for two more:
|
||||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
|
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
|
||||||
if (!windows_flag && !solaris_flag)
|
if (!windows_flag && !solaris_flag) {
|
||||||
expect_equal(bst$raw, bst2$raw)
|
expect_equal(bst$raw, bst2$raw)
|
||||||
|
}
|
||||||
expect_false(is.null(bst2$evaluation_log))
|
expect_false(is.null(bst2$evaluation_log))
|
||||||
expect_equal(dim(bst2$evaluation_log), c(4, 2))
|
expect_equal(dim(bst2$evaluation_log), c(4, 2))
|
||||||
expect_equal(bst2$evaluation_log, bst$evaluation_log)
|
expect_equal(bst2$evaluation_log, bst$evaluation_log)
|
||||||
# test continuing from raw model data
|
# test continuing from raw model data
|
||||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
|
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
|
||||||
if (!windows_flag && !solaris_flag)
|
if (!windows_flag && !solaris_flag) {
|
||||||
expect_equal(bst$raw, bst2$raw)
|
expect_equal(bst$raw, bst2$raw)
|
||||||
|
}
|
||||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||||
# test continuing from a model in file
|
# test continuing from a model in file
|
||||||
xgb.save(bst1, "xgboost.json")
|
xgb.save(bst1, "xgboost.json")
|
||||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.json")
|
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.json")
|
||||||
if (!windows_flag && !solaris_flag)
|
if (!windows_flag && !solaris_flag) {
|
||||||
expect_equal(bst$raw, bst2$raw)
|
expect_equal(bst$raw, bst2$raw)
|
||||||
|
}
|
||||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||||
file.remove("xgboost.json")
|
file.remove("xgboost.json")
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("model serialization works", {
|
test_that("model serialization works", {
|
||||||
out_path <- "model_serialization"
|
out_path <- "model_serialization"
|
||||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
|
||||||
watchlist <- list(train = dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
param <- list(objective = "binary:logistic")
|
param <- list(objective = "binary:logistic", nthread = n_threads)
|
||||||
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
|
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
|
||||||
raw <- xgb.serialize(booster)
|
raw <- xgb.serialize(booster)
|
||||||
saveRDS(raw, out_path)
|
saveRDS(raw, out_path)
|
||||||
@ -309,11 +351,14 @@ test_that("model serialization works", {
|
|||||||
test_that("xgb.cv works", {
|
test_that("xgb.cv works", {
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
expect_output(
|
expect_output(
|
||||||
cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
|
cv <- xgb.cv(
|
||||||
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = train$data, label = train$label, max_depth = 2, nfold = 5,
|
||||||
eval_metric = "error", verbose = TRUE)
|
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
, "train-error:")
|
eval_metric = "error", verbose = TRUE
|
||||||
expect_is(cv, 'xgb.cv.synchronous')
|
),
|
||||||
|
"train-error:"
|
||||||
|
)
|
||||||
|
expect_is(cv, "xgb.cv.synchronous")
|
||||||
expect_false(is.null(cv$evaluation_log))
|
expect_false(is.null(cv$evaluation_log))
|
||||||
expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
|
expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
|
||||||
expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008)
|
expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008)
|
||||||
@ -326,15 +371,19 @@ test_that("xgb.cv works", {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.cv works with stratified folds", {
|
test_that("xgb.cv works with stratified folds", {
|
||||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
|
||||||
set.seed(314159)
|
set.seed(314159)
|
||||||
cv <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
|
cv <- xgb.cv(
|
||||||
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = dtrain, max_depth = 2, nfold = 5,
|
||||||
verbose = TRUE, stratified = FALSE)
|
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
verbose = TRUE, stratified = FALSE
|
||||||
|
)
|
||||||
set.seed(314159)
|
set.seed(314159)
|
||||||
cv2 <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
|
cv2 <- xgb.cv(
|
||||||
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = dtrain, max_depth = 2, nfold = 5,
|
||||||
verbose = TRUE, stratified = TRUE)
|
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
verbose = TRUE, stratified = TRUE
|
||||||
|
)
|
||||||
# Stratified folds should result in a different evaluation logs
|
# Stratified folds should result in a different evaluation logs
|
||||||
expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
|
expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
|
||||||
})
|
})
|
||||||
@ -342,40 +391,57 @@ test_that("xgb.cv works with stratified folds", {
|
|||||||
test_that("train and predict with non-strict classes", {
|
test_that("train and predict with non-strict classes", {
|
||||||
# standard dense matrix input
|
# standard dense matrix input
|
||||||
train_dense <- as.matrix(train$data)
|
train_dense <- as.matrix(train$data)
|
||||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
data = train_dense, label = train$label, max_depth = 2,
|
||||||
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
verbose = 0
|
||||||
|
)
|
||||||
pr0 <- predict(bst, train_dense)
|
pr0 <- predict(bst, train_dense)
|
||||||
|
|
||||||
# dense matrix-like input of non-matrix class
|
# dense matrix-like input of non-matrix class
|
||||||
class(train_dense) <- 'shmatrix'
|
class(train_dense) <- "shmatrix"
|
||||||
expect_true(is.matrix(train_dense))
|
expect_true(is.matrix(train_dense))
|
||||||
expect_error(
|
expect_error(
|
||||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
data = train_dense, label = train$label, max_depth = 2,
|
||||||
, regexp = NA)
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
verbose = 0
|
||||||
|
),
|
||||||
|
regexp = NA
|
||||||
|
)
|
||||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
expect_equal(pr0, pr)
|
expect_equal(pr0, pr)
|
||||||
|
|
||||||
# dense matrix-like input of non-matrix class with some inheritance
|
# dense matrix-like input of non-matrix class with some inheritance
|
||||||
class(train_dense) <- c('pphmatrix', 'shmatrix')
|
class(train_dense) <- c("pphmatrix", "shmatrix")
|
||||||
expect_true(is.matrix(train_dense))
|
expect_true(is.matrix(train_dense))
|
||||||
expect_error(
|
expect_error(
|
||||||
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
data = train_dense, label = train$label, max_depth = 2,
|
||||||
, regexp = NA)
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
verbose = 0
|
||||||
|
),
|
||||||
|
regexp = NA
|
||||||
|
)
|
||||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
expect_equal(pr0, pr)
|
expect_equal(pr0, pr)
|
||||||
|
|
||||||
# when someone inherits from xgb.Booster, it should still be possible to use it as xgb.Booster
|
# when someone inherits from xgb.Booster, it should still be possible to use it as xgb.Booster
|
||||||
class(bst) <- c('super.Booster', 'xgb.Booster')
|
class(bst) <- c("super.Booster", "xgb.Booster")
|
||||||
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
expect_equal(pr0, pr)
|
expect_equal(pr0, pr)
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("max_delta_step works", {
|
test_that("max_delta_step works", {
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
|
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
watchlist <- list(train = dtrain)
|
watchlist <- list(train = dtrain)
|
||||||
param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
|
param <- list(
|
||||||
|
objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
|
||||||
|
nthread = n_threads,
|
||||||
|
eta = 0.5
|
||||||
|
)
|
||||||
nrounds <- 5
|
nrounds <- 5
|
||||||
# model with no restriction on max_delta_step
|
# model with no restriction on max_delta_step
|
||||||
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
|
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
|
||||||
@ -395,14 +461,16 @@ test_that("colsample_bytree works", {
|
|||||||
test_y <- as.numeric(rowSums(test_x) > 0)
|
test_y <- as.numeric(rowSums(test_x) > 0)
|
||||||
colnames(train_x) <- paste0("Feature_", sprintf("%03d", 1:100))
|
colnames(train_x) <- paste0("Feature_", sprintf("%03d", 1:100))
|
||||||
colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
|
colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
|
||||||
dtrain <- xgb.DMatrix(train_x, label = train_y)
|
dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
|
||||||
dtest <- xgb.DMatrix(test_x, label = test_y)
|
dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
|
||||||
watchlist <- list(train = dtrain, eval = dtest)
|
watchlist <- list(train = dtrain, eval = dtest)
|
||||||
## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
|
## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
|
||||||
## each tree
|
## each tree
|
||||||
param <- list(max_depth = 2, eta = 0, nthread = 2,
|
param <- list(
|
||||||
|
max_depth = 2, eta = 0, nthread = n_threads,
|
||||||
colsample_bytree = 0.01, objective = "binary:logistic",
|
colsample_bytree = 0.01, objective = "binary:logistic",
|
||||||
eval_metric = "auc")
|
eval_metric = "auc"
|
||||||
|
)
|
||||||
set.seed(2)
|
set.seed(2)
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
|
bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
|
||||||
xgb.importance(model = bst)
|
xgb.importance(model = bst)
|
||||||
@ -412,9 +480,11 @@ test_that("colsample_bytree works", {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test_that("Configuration works", {
|
test_that("Configuration works", {
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
bst <- xgboost(
|
||||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic",
|
data = train$data, label = train$label, max_depth = 2,
|
||||||
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
|
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||||
|
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
|
||||||
|
)
|
||||||
config <- xgb.config(bst)
|
config <- xgb.config(bst)
|
||||||
xgb.config(bst) <- config
|
xgb.config(bst) <- config
|
||||||
reloaded_config <- xgb.config(bst)
|
reloaded_config <- xgb.config(bst)
|
||||||
@ -451,22 +521,26 @@ test_that("strict_shape works", {
|
|||||||
y <- as.numeric(iris$Species) - 1
|
y <- as.numeric(iris$Species) - 1
|
||||||
X <- as.matrix(iris[, -5])
|
X <- as.matrix(iris[, -5])
|
||||||
|
|
||||||
bst <- xgboost(data = X, label = y,
|
bst <- xgboost(
|
||||||
max_depth = 2, nrounds = n_rounds,
|
data = X, label = y,
|
||||||
objective = "multi:softprob", num_class = 3, eval_metric = "merror")
|
max_depth = 2, nrounds = n_rounds, nthread = n_threads,
|
||||||
|
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
|
||||||
|
)
|
||||||
|
|
||||||
test_strict_shape(bst, X, 3)
|
test_strict_shape(bst, X, 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
test_agaricus <- function() {
|
test_agaricus <- function() {
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = "xgboost")
|
||||||
X <- agaricus.train$data
|
X <- agaricus.train$data
|
||||||
y <- agaricus.train$label
|
y <- agaricus.train$label
|
||||||
|
|
||||||
bst <- xgboost(data = X, label = y, max_depth = 2,
|
bst <- xgboost(
|
||||||
|
data = X, label = y, max_depth = 2, nthread = n_threads,
|
||||||
nrounds = n_rounds, objective = "binary:logistic",
|
nrounds = n_rounds, objective = "binary:logistic",
|
||||||
eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss")
|
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
|
||||||
|
)
|
||||||
|
|
||||||
test_strict_shape(bst, X, 1)
|
test_strict_shape(bst, X, 1)
|
||||||
}
|
}
|
||||||
@ -481,8 +555,10 @@ test_that("'predict' accepts CSR data", {
|
|||||||
x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
|
x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
|
||||||
x_csr <- as(x_csc, "RsparseMatrix")
|
x_csr <- as(x_csc, "RsparseMatrix")
|
||||||
x_spv <- as(x_csc, "sparseVector")
|
x_spv <- as(x_csc, "sparseVector")
|
||||||
bst <- xgboost(data = X, label = y, objective = "binary:logistic",
|
bst <- xgboost(
|
||||||
nrounds = 5L, verbose = FALSE)
|
data = X, label = y, objective = "binary:logistic",
|
||||||
|
nrounds = 5L, verbose = FALSE, nthread = n_threads,
|
||||||
|
)
|
||||||
p_csc <- predict(bst, x_csc)
|
p_csc <- predict(bst, x_csc)
|
||||||
p_csr <- predict(bst, x_csr)
|
p_csr <- predict(bst, x_csr)
|
||||||
p_spv <- predict(bst, x_spv)
|
p_spv <- predict(bst, x_spv)
|
||||||
|
|||||||
@ -6,6 +6,8 @@ data(agaricus.test, package = 'xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
# add some label noise for early stopping tests
|
# add some label noise for early stopping tests
|
||||||
add.noise <- function(label, frac) {
|
add.noise <- function(label, frac) {
|
||||||
inoise <- sample(length(label), length(label) * frac)
|
inoise <- sample(length(label), length(label) * frac)
|
||||||
@ -15,15 +17,15 @@ add.noise <- function(label, frac) {
|
|||||||
set.seed(11)
|
set.seed(11)
|
||||||
ltrain <- add.noise(train$label, 0.2)
|
ltrain <- add.noise(train$label, 0.2)
|
||||||
ltest <- add.noise(test$label, 0.2)
|
ltest <- add.noise(test$label, 0.2)
|
||||||
dtrain <- xgb.DMatrix(train$data, label = ltrain)
|
dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
|
||||||
dtest <- xgb.DMatrix(test$data, label = ltest)
|
dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
|
||||||
watchlist <- list(train = dtrain, test = dtest)
|
watchlist <- list(train = dtrain, test = dtest)
|
||||||
|
|
||||||
|
|
||||||
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
|
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
|
||||||
|
|
||||||
param <- list(objective = "binary:logistic", eval_metric = "error",
|
param <- list(objective = "binary:logistic", eval_metric = "error",
|
||||||
max_depth = 2, nthread = 2)
|
max_depth = 2, nthread = n_threads)
|
||||||
|
|
||||||
|
|
||||||
test_that("cb.print.evaluation works as expected", {
|
test_that("cb.print.evaluation works as expected", {
|
||||||
@ -103,7 +105,7 @@ test_that("cb.evaluation.log works as expected", {
|
|||||||
|
|
||||||
|
|
||||||
param <- list(objective = "binary:logistic", eval_metric = "error",
|
param <- list(objective = "binary:logistic", eval_metric = "error",
|
||||||
max_depth = 4, nthread = 2)
|
max_depth = 4, nthread = n_threads)
|
||||||
|
|
||||||
test_that("can store evaluation_log without printing", {
|
test_that("can store evaluation_log without printing", {
|
||||||
expect_silent(
|
expect_silent(
|
||||||
@ -179,8 +181,10 @@ test_that("cb.save.model works as expected", {
|
|||||||
expect_true(file.exists('xgboost_01.json'))
|
expect_true(file.exists('xgboost_01.json'))
|
||||||
expect_true(file.exists('xgboost_02.json'))
|
expect_true(file.exists('xgboost_02.json'))
|
||||||
b1 <- xgb.load('xgboost_01.json')
|
b1 <- xgb.load('xgboost_01.json')
|
||||||
|
xgb.parameters(b1) <- list(nthread = 2)
|
||||||
expect_equal(xgb.ntree(b1), 1)
|
expect_equal(xgb.ntree(b1), 1)
|
||||||
b2 <- xgb.load('xgboost_02.json')
|
b2 <- xgb.load('xgboost_02.json')
|
||||||
|
xgb.parameters(b2) <- list(nthread = 2)
|
||||||
expect_equal(xgb.ntree(b2), 2)
|
expect_equal(xgb.ntree(b2), 2)
|
||||||
|
|
||||||
xgb.config(b2) <- xgb.config(bst)
|
xgb.config(b2) <- xgb.config(bst)
|
||||||
@ -267,7 +271,8 @@ test_that("early stopping works with titanic", {
|
|||||||
objective = "binary:logistic",
|
objective = "binary:logistic",
|
||||||
eval_metric = "auc",
|
eval_metric = "auc",
|
||||||
nrounds = 100,
|
nrounds = 100,
|
||||||
early_stopping_rounds = 3
|
early_stopping_rounds = 3,
|
||||||
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
|
|
||||||
expect_true(TRUE) # should not crash
|
expect_true(TRUE) # should not crash
|
||||||
@ -308,7 +313,7 @@ test_that("prediction in xgb.cv works", {
|
|||||||
|
|
||||||
test_that("prediction in xgb.cv works for gblinear too", {
|
test_that("prediction in xgb.cv works for gblinear too", {
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = 2)
|
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads)
|
||||||
cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
|
cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
|
||||||
expect_false(is.null(cv$evaluation_log))
|
expect_false(is.null(cv$evaluation_log))
|
||||||
expect_false(is.null(cv$pred))
|
expect_false(is.null(cv$pred))
|
||||||
@ -341,7 +346,7 @@ test_that("prediction in xgb.cv for softprob works", {
|
|||||||
set.seed(11)
|
set.seed(11)
|
||||||
expect_warning(
|
expect_warning(
|
||||||
cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
|
cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
|
||||||
eta = 0.5, nrounds = 5, max_depth = 3, nthread = 2,
|
eta = 0.5, nrounds = 5, max_depth = 3, nthread = n_threads,
|
||||||
subsample = 0.8, gamma = 2, verbose = 0,
|
subsample = 0.8, gamma = 2, verbose = 0,
|
||||||
prediction = TRUE, objective = "multi:softprob", num_class = 3)
|
prediction = TRUE, objective = "multi:softprob", num_class = 3)
|
||||||
, NA)
|
, NA)
|
||||||
|
|||||||
@ -2,10 +2,16 @@ context('Test models with custom objective')
|
|||||||
|
|
||||||
set.seed(1994)
|
set.seed(1994)
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = 'xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
dtest <- xgb.DMatrix(
|
||||||
|
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||||
|
)
|
||||||
watchlist <- list(eval = dtest, train = dtrain)
|
watchlist <- list(eval = dtest, train = dtrain)
|
||||||
|
|
||||||
logregobj <- function(preds, dtrain) {
|
logregobj <- function(preds, dtrain) {
|
||||||
@ -22,7 +28,7 @@ evalerror <- function(preds, dtrain) {
|
|||||||
return(list(metric = "error", value = err))
|
return(list(metric = "error", value = err))
|
||||||
}
|
}
|
||||||
|
|
||||||
param <- list(max_depth = 2, eta = 1, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, nthread = n_threads,
|
||||||
objective = logregobj, eval_metric = evalerror)
|
objective = logregobj, eval_metric = evalerror)
|
||||||
num_round <- 2
|
num_round <- 2
|
||||||
|
|
||||||
@ -67,7 +73,7 @@ test_that("custom objective using DMatrix attr works", {
|
|||||||
test_that("custom objective with multi-class shape", {
|
test_that("custom objective with multi-class shape", {
|
||||||
data <- as.matrix(iris[, -5])
|
data <- as.matrix(iris[, -5])
|
||||||
label <- as.numeric(iris$Species) - 1
|
label <- as.numeric(iris$Species) - 1
|
||||||
dtrain <- xgb.DMatrix(data = data, label = label)
|
dtrain <- xgb.DMatrix(data = data, label = label, nthread = n_threads)
|
||||||
n_classes <- 3
|
n_classes <- 3
|
||||||
|
|
||||||
fake_softprob <- function(preds, dtrain) {
|
fake_softprob <- function(preds, dtrain) {
|
||||||
|
|||||||
@ -5,19 +5,21 @@ data(agaricus.test, package = "xgboost")
|
|||||||
test_data <- agaricus.test$data[1:100, ]
|
test_data <- agaricus.test$data[1:100, ]
|
||||||
test_label <- agaricus.test$label[1:100]
|
test_label <- agaricus.test$label[1:100]
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
test_that("xgb.DMatrix: basic construction", {
|
test_that("xgb.DMatrix: basic construction", {
|
||||||
# from sparse matrix
|
# from sparse matrix
|
||||||
dtest1 <- xgb.DMatrix(test_data, label = test_label)
|
dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||||
|
|
||||||
# from dense matrix
|
# from dense matrix
|
||||||
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label)
|
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label, nthread = n_threads)
|
||||||
expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label"))
|
expect_equal(getinfo(dtest1, "label"), getinfo(dtest2, "label"))
|
||||||
expect_equal(dim(dtest1), dim(dtest2))
|
expect_equal(dim(dtest1), dim(dtest2))
|
||||||
|
|
||||||
# from dense integer matrix
|
# from dense integer matrix
|
||||||
int_data <- as.matrix(test_data)
|
int_data <- as.matrix(test_data)
|
||||||
storage.mode(int_data) <- "integer"
|
storage.mode(int_data) <- "integer"
|
||||||
dtest3 <- xgb.DMatrix(int_data, label = test_label)
|
dtest3 <- xgb.DMatrix(int_data, label = test_label, nthread = n_threads)
|
||||||
expect_equal(dim(dtest1), dim(dtest3))
|
expect_equal(dim(dtest1), dim(dtest3))
|
||||||
|
|
||||||
n_samples <- 100
|
n_samples <- 100
|
||||||
@ -29,15 +31,15 @@ test_that("xgb.DMatrix: basic construction", {
|
|||||||
X <- matrix(X, nrow = n_samples)
|
X <- matrix(X, nrow = n_samples)
|
||||||
y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)
|
y <- rbinom(n = n_samples, size = 1, prob = 1 / 2)
|
||||||
|
|
||||||
fd <- xgb.DMatrix(X, label = y, missing = 1)
|
fd <- xgb.DMatrix(X, label = y, missing = 1, nthread = n_threads)
|
||||||
|
|
||||||
dgc <- as(X, "dgCMatrix")
|
dgc <- as(X, "dgCMatrix")
|
||||||
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0)
|
fdgc <- xgb.DMatrix(dgc, label = y, missing = 1.0, nthread = n_threads)
|
||||||
|
|
||||||
dgr <- as(X, "dgRMatrix")
|
dgr <- as(X, "dgRMatrix")
|
||||||
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1)
|
fdgr <- xgb.DMatrix(dgr, label = y, missing = 1, nthread = n_threads)
|
||||||
|
|
||||||
params <- list(tree_method = "hist")
|
params <- list(tree_method = "hist", nthread = n_threads)
|
||||||
bst_fd <- xgb.train(
|
bst_fd <- xgb.train(
|
||||||
params, nrounds = 8, fd, watchlist = list(train = fd)
|
params, nrounds = 8, fd, watchlist = list(train = fd)
|
||||||
)
|
)
|
||||||
@ -64,12 +66,12 @@ test_that("xgb.DMatrix: NA", {
|
|||||||
)
|
)
|
||||||
x[1, "x1"] <- NA
|
x[1, "x1"] <- NA
|
||||||
|
|
||||||
m <- xgb.DMatrix(x)
|
m <- xgb.DMatrix(x, nthread = n_threads)
|
||||||
xgb.DMatrix.save(m, "int.dmatrix")
|
xgb.DMatrix.save(m, "int.dmatrix")
|
||||||
|
|
||||||
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
|
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
|
||||||
colnames(x) <- c("x1", "x2")
|
colnames(x) <- c("x1", "x2")
|
||||||
m <- xgb.DMatrix(x)
|
m <- xgb.DMatrix(x, nthread = n_threads)
|
||||||
|
|
||||||
xgb.DMatrix.save(m, "float.dmatrix")
|
xgb.DMatrix.save(m, "float.dmatrix")
|
||||||
|
|
||||||
@ -94,7 +96,7 @@ test_that("xgb.DMatrix: NA", {
|
|||||||
|
|
||||||
test_that("xgb.DMatrix: saving, loading", {
|
test_that("xgb.DMatrix: saving, loading", {
|
||||||
# save to a local file
|
# save to a local file
|
||||||
dtest1 <- xgb.DMatrix(test_data, label = test_label)
|
dtest1 <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||||
tmp_file <- tempfile('xgb.DMatrix_')
|
tmp_file <- tempfile('xgb.DMatrix_')
|
||||||
on.exit(unlink(tmp_file))
|
on.exit(unlink(tmp_file))
|
||||||
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
|
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
|
||||||
@ -109,13 +111,17 @@ test_that("xgb.DMatrix: saving, loading", {
|
|||||||
tmp_file <- tempfile(fileext = ".libsvm")
|
tmp_file <- tempfile(fileext = ".libsvm")
|
||||||
writeLines(tmp, tmp_file)
|
writeLines(tmp, tmp_file)
|
||||||
expect_true(file.exists(tmp_file))
|
expect_true(file.exists(tmp_file))
|
||||||
dtest4 <- xgb.DMatrix(paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE)
|
dtest4 <- xgb.DMatrix(
|
||||||
|
paste(tmp_file, "?format=libsvm", sep = ""), silent = TRUE, nthread = n_threads
|
||||||
|
)
|
||||||
expect_equal(dim(dtest4), c(3, 4))
|
expect_equal(dim(dtest4), c(3, 4))
|
||||||
expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))
|
expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))
|
||||||
|
|
||||||
# check that feature info is saved
|
# check that feature info is saved
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
dtrain <- xgb.DMatrix(data = agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
|
data = agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
cnames <- colnames(dtrain)
|
cnames <- colnames(dtrain)
|
||||||
expect_equal(length(cnames), 126)
|
expect_equal(length(cnames), 126)
|
||||||
tmp_file <- tempfile('xgb.DMatrix_')
|
tmp_file <- tempfile('xgb.DMatrix_')
|
||||||
@ -129,7 +135,7 @@ test_that("xgb.DMatrix: saving, loading", {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.DMatrix: getinfo & setinfo", {
|
test_that("xgb.DMatrix: getinfo & setinfo", {
|
||||||
dtest <- xgb.DMatrix(test_data)
|
dtest <- xgb.DMatrix(test_data, nthread = n_threads)
|
||||||
expect_true(setinfo(dtest, 'label', test_label))
|
expect_true(setinfo(dtest, 'label', test_label))
|
||||||
labels <- getinfo(dtest, 'label')
|
labels <- getinfo(dtest, 'label')
|
||||||
expect_equal(test_label, getinfo(dtest, 'label'))
|
expect_equal(test_label, getinfo(dtest, 'label'))
|
||||||
@ -156,7 +162,7 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.DMatrix: slice, dim", {
|
test_that("xgb.DMatrix: slice, dim", {
|
||||||
dtest <- xgb.DMatrix(test_data, label = test_label)
|
dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||||
expect_equal(dim(dtest), dim(test_data))
|
expect_equal(dim(dtest), dim(test_data))
|
||||||
dsub1 <- slice(dtest, 1:42)
|
dsub1 <- slice(dtest, 1:42)
|
||||||
expect_equal(nrow(dsub1), 42)
|
expect_equal(nrow(dsub1), 42)
|
||||||
@ -171,16 +177,20 @@ test_that("xgb.DMatrix: slice, trailing empty rows", {
|
|||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
train_data <- agaricus.train$data
|
train_data <- agaricus.train$data
|
||||||
train_label <- agaricus.train$label
|
train_label <- agaricus.train$label
|
||||||
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
|
dtrain <- xgb.DMatrix(
|
||||||
|
data = train_data, label = train_label, nthread = n_threads
|
||||||
|
)
|
||||||
slice(dtrain, 6513L)
|
slice(dtrain, 6513L)
|
||||||
train_data[6513, ] <- 0
|
train_data[6513, ] <- 0
|
||||||
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
|
dtrain <- xgb.DMatrix(
|
||||||
|
data = train_data, label = train_label, nthread = n_threads
|
||||||
|
)
|
||||||
slice(dtrain, 6513L)
|
slice(dtrain, 6513L)
|
||||||
expect_equal(nrow(dtrain), 6513)
|
expect_equal(nrow(dtrain), 6513)
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.DMatrix: colnames", {
|
test_that("xgb.DMatrix: colnames", {
|
||||||
dtest <- xgb.DMatrix(test_data, label = test_label)
|
dtest <- xgb.DMatrix(test_data, label = test_label, nthread = n_threads)
|
||||||
expect_equal(colnames(dtest), colnames(test_data))
|
expect_equal(colnames(dtest), colnames(test_data))
|
||||||
expect_error(colnames(dtest) <- 'asdf')
|
expect_error(colnames(dtest) <- 'asdf')
|
||||||
new_names <- make.names(seq_len(ncol(test_data)))
|
new_names <- make.names(seq_len(ncol(test_data)))
|
||||||
@ -196,7 +206,7 @@ test_that("xgb.DMatrix: nrow is correct for a very sparse matrix", {
|
|||||||
x <- Matrix::rsparsematrix(nr, 100, density = 0.0005)
|
x <- Matrix::rsparsematrix(nr, 100, density = 0.0005)
|
||||||
# we want it very sparse, so that last rows are empty
|
# we want it very sparse, so that last rows are empty
|
||||||
expect_lt(max(x@i), nr)
|
expect_lt(max(x@i), nr)
|
||||||
dtest <- xgb.DMatrix(x)
|
dtest <- xgb.DMatrix(x, nthread = n_threads)
|
||||||
expect_equal(dim(dtest), dim(x))
|
expect_equal(dim(dtest), dim(x))
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -205,8 +215,8 @@ test_that("xgb.DMatrix: print", {
|
|||||||
|
|
||||||
# core DMatrix with just data and labels
|
# core DMatrix with just data and labels
|
||||||
dtrain <- xgb.DMatrix(
|
dtrain <- xgb.DMatrix(
|
||||||
data = agaricus.train$data
|
data = agaricus.train$data, label = agaricus.train$label,
|
||||||
, label = agaricus.train$label
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
txt <- capture.output({
|
txt <- capture.output({
|
||||||
print(dtrain)
|
print(dtrain)
|
||||||
@ -222,10 +232,11 @@ test_that("xgb.DMatrix: print", {
|
|||||||
|
|
||||||
# DMatrix with weights and base_margin
|
# DMatrix with weights and base_margin
|
||||||
dtrain <- xgb.DMatrix(
|
dtrain <- xgb.DMatrix(
|
||||||
data = agaricus.train$data
|
data = agaricus.train$data,
|
||||||
, label = agaricus.train$label
|
label = agaricus.train$label,
|
||||||
, weight = seq_along(agaricus.train$label)
|
weight = seq_along(agaricus.train$label),
|
||||||
, base_margin = agaricus.train$label
|
base_margin = agaricus.train$label,
|
||||||
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
txt <- capture.output({
|
txt <- capture.output({
|
||||||
print(dtrain)
|
print(dtrain)
|
||||||
@ -234,7 +245,8 @@ test_that("xgb.DMatrix: print", {
|
|||||||
|
|
||||||
# DMatrix with just features
|
# DMatrix with just features
|
||||||
dtrain <- xgb.DMatrix(
|
dtrain <- xgb.DMatrix(
|
||||||
data = agaricus.train$data
|
data = agaricus.train$data,
|
||||||
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
txt <- capture.output({
|
txt <- capture.output({
|
||||||
print(dtrain)
|
print(dtrain)
|
||||||
@ -245,7 +257,8 @@ test_that("xgb.DMatrix: print", {
|
|||||||
data_no_colnames <- agaricus.train$data
|
data_no_colnames <- agaricus.train$data
|
||||||
colnames(data_no_colnames) <- NULL
|
colnames(data_no_colnames) <- NULL
|
||||||
dtrain <- xgb.DMatrix(
|
dtrain <- xgb.DMatrix(
|
||||||
data = data_no_colnames
|
data = data_no_colnames,
|
||||||
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
txt <- capture.output({
|
txt <- capture.output({
|
||||||
print(dtrain)
|
print(dtrain)
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
context("feature weights")
|
context("feature weights")
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
test_that("training with feature weights works", {
|
test_that("training with feature weights works", {
|
||||||
nrows <- 1000
|
nrows <- 1000
|
||||||
ncols <- 9
|
ncols <- 9
|
||||||
@ -10,8 +12,12 @@ test_that("training with feature weights works", {
|
|||||||
|
|
||||||
test <- function(tm) {
|
test <- function(tm) {
|
||||||
names <- paste0("f", 1:ncols)
|
names <- paste0("f", 1:ncols)
|
||||||
xy <- xgb.DMatrix(data = x, label = y, feature_weights = weights)
|
xy <- xgb.DMatrix(
|
||||||
params <- list(colsample_bynode = 0.4, tree_method = tm, nthread = 1)
|
data = x, label = y, feature_weights = weights, nthread = n_threads
|
||||||
|
)
|
||||||
|
params <- list(
|
||||||
|
colsample_bynode = 0.4, tree_method = tm, nthread = n_threads
|
||||||
|
)
|
||||||
model <- xgb.train(params = params, data = xy, nrounds = 32)
|
model <- xgb.train(params = params, data = xy, nrounds = 32)
|
||||||
importance <- xgb.importance(model = model, feature_names = names)
|
importance <- xgb.importance(model = model, feature_names = names)
|
||||||
expect_equal(dim(importance), c(ncols, 4))
|
expect_equal(dim(importance), c(ncols, 4))
|
||||||
|
|||||||
@ -1,13 +1,19 @@
|
|||||||
context('Test generalized linear models')
|
context('Test generalized linear models')
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
test_that("gblinear works", {
|
test_that("gblinear works", {
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = 'xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
dtest <- xgb.DMatrix(
|
||||||
|
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
|
||||||
param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
|
param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
|
||||||
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
|
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
|
||||||
watchlist <- list(eval = dtest, train = dtrain)
|
watchlist <- list(eval = dtest, train = dtrain)
|
||||||
|
|
||||||
n <- 5 # iterations
|
n <- 5 # iterations
|
||||||
@ -48,12 +54,16 @@ test_that("gblinear works", {
|
|||||||
test_that("gblinear early stopping works", {
|
test_that("gblinear early stopping works", {
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = 'xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
dtest <- xgb.DMatrix(
|
||||||
|
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
|
||||||
param <- list(
|
param <- list(
|
||||||
objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
|
objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
|
||||||
nthread = 2, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
|
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001,
|
||||||
updater = "coord_descent"
|
updater = "coord_descent"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -171,6 +171,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
|
|||||||
fit <- xgboost(
|
fit <- xgboost(
|
||||||
params = c(
|
params = c(
|
||||||
list(
|
list(
|
||||||
|
nthread = 2,
|
||||||
booster = booster,
|
booster = booster,
|
||||||
objective = "reg:squarederror",
|
objective = "reg:squarederror",
|
||||||
eval_metric = "rmse"),
|
eval_metric = "rmse"),
|
||||||
@ -257,7 +258,7 @@ test_that("xgb.Booster serializing as R object works", {
|
|||||||
.skip_if_vcd_not_available()
|
.skip_if_vcd_not_available()
|
||||||
saveRDS(bst.Tree, 'xgb.model.rds')
|
saveRDS(bst.Tree, 'xgb.model.rds')
|
||||||
bst <- readRDS('xgb.model.rds')
|
bst <- readRDS('xgb.model.rds')
|
||||||
dtrain <- xgb.DMatrix(sparse_matrix, label = label)
|
dtrain <- xgb.DMatrix(sparse_matrix, label = label, nthread = 2)
|
||||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
||||||
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
|
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
|
||||||
xgb.save(bst, 'xgb.model')
|
xgb.save(bst, 'xgb.model')
|
||||||
@ -363,7 +364,8 @@ test_that("xgb.importance works with and without feature names", {
|
|||||||
data = as.matrix(data.frame(x = c(0, 1))),
|
data = as.matrix(data.frame(x = c(0, 1))),
|
||||||
label = c(1, 2),
|
label = c(1, 2),
|
||||||
nrounds = 1,
|
nrounds = 1,
|
||||||
base_score = 0.5
|
base_score = 0.5,
|
||||||
|
nthread = 2
|
||||||
)
|
)
|
||||||
df <- xgb.model.dt.tree(model = m)
|
df <- xgb.model.dt.tree(model = m)
|
||||||
expect_equal(df$Feature, "Leaf")
|
expect_equal(df$Feature, "Leaf")
|
||||||
|
|||||||
@ -2,6 +2,8 @@ require(xgboost)
|
|||||||
|
|
||||||
context("interaction constraints")
|
context("interaction constraints")
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
set.seed(1024)
|
set.seed(1024)
|
||||||
x1 <- rnorm(1000, 1)
|
x1 <- rnorm(1000, 1)
|
||||||
x2 <- rnorm(1000, 1)
|
x2 <- rnorm(1000, 1)
|
||||||
@ -45,11 +47,18 @@ test_that("interaction constraints scientific representation", {
|
|||||||
d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols)
|
d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols)
|
||||||
y <- rnorm(rows)
|
y <- rnorm(rows)
|
||||||
|
|
||||||
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
|
dtrain <- xgb.DMatrix(data = d, info = list(label = y), nthread = n_threads)
|
||||||
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))
|
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))
|
||||||
|
|
||||||
with_inc <- xgb.train(data = dtrain, tree_method = 'hist',
|
with_inc <- xgb.train(
|
||||||
interaction_constraints = inc, nrounds = 10)
|
data = dtrain,
|
||||||
without_inc <- xgb.train(data = dtrain, tree_method = 'hist', nrounds = 10)
|
tree_method = 'hist',
|
||||||
|
interaction_constraints = inc,
|
||||||
|
nrounds = 10,
|
||||||
|
nthread = n_threads
|
||||||
|
)
|
||||||
|
without_inc <- xgb.train(
|
||||||
|
data = dtrain, tree_method = 'hist', nrounds = 10, nthread = n_threads
|
||||||
|
)
|
||||||
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
|
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
|
||||||
})
|
})
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
context('Test prediction of feature interactions')
|
context('Test prediction of feature interactions')
|
||||||
|
|
||||||
set.seed(123)
|
set.seed(123)
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
test_that("predict feature interactions works", {
|
test_that("predict feature interactions works", {
|
||||||
# simulate some binary data and a linear outcome with an interaction term
|
# simulate some binary data and a linear outcome with an interaction term
|
||||||
@ -19,8 +20,10 @@ test_that("predict feature interactions works", {
|
|||||||
|
|
||||||
y <- f_int(X)
|
y <- f_int(X)
|
||||||
|
|
||||||
dm <- xgb.DMatrix(X, label = y)
|
dm <- xgb.DMatrix(X, label = y, nthread = n_threads)
|
||||||
param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
|
param <- list(
|
||||||
|
eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = n_threads
|
||||||
|
)
|
||||||
b <- xgb.train(param, dm, 100)
|
b <- xgb.train(param, dm, 100)
|
||||||
|
|
||||||
pred <- predict(b, dm, outputmargin = TRUE)
|
pred <- predict(b, dm, outputmargin = TRUE)
|
||||||
@ -99,11 +102,13 @@ test_that("SHAP contribution values are not NAN", {
|
|||||||
verbose = 0,
|
verbose = 0,
|
||||||
params = list(
|
params = list(
|
||||||
objective = "reg:squarederror",
|
objective = "reg:squarederror",
|
||||||
eval_metric = "rmse"),
|
eval_metric = "rmse",
|
||||||
|
nthread = n_threads
|
||||||
|
),
|
||||||
data = as.matrix(subset(d, fold == 2)[, ivs]),
|
data = as.matrix(subset(d, fold == 2)[, ivs]),
|
||||||
label = subset(d, fold == 2)$y,
|
label = subset(d, fold == 2)$y,
|
||||||
nthread = 1,
|
nrounds = 3
|
||||||
nrounds = 3)
|
)
|
||||||
|
|
||||||
shaps <- as.data.frame(predict(fit,
|
shaps <- as.data.frame(predict(fit,
|
||||||
newdata = as.matrix(subset(d, fold == 1)[, ivs]),
|
newdata = as.matrix(subset(d, fold == 1)[, ivs]),
|
||||||
@ -116,8 +121,12 @@ test_that("SHAP contribution values are not NAN", {
|
|||||||
|
|
||||||
|
|
||||||
test_that("multiclass feature interactions work", {
|
test_that("multiclass feature interactions work", {
|
||||||
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
|
dm <- xgb.DMatrix(
|
||||||
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
|
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
|
||||||
|
)
|
||||||
|
param <- list(
|
||||||
|
eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3, nthread = n_threads
|
||||||
|
)
|
||||||
b <- xgb.train(param, dm, 40)
|
b <- xgb.train(param, dm, 40)
|
||||||
pred <- t(
|
pred <- t(
|
||||||
array(
|
array(
|
||||||
@ -166,6 +175,7 @@ test_that("SHAP single sample works", {
|
|||||||
max_depth = 2,
|
max_depth = 2,
|
||||||
nrounds = 4,
|
nrounds = 4,
|
||||||
objective = "binary:logistic",
|
objective = "binary:logistic",
|
||||||
|
nthread = n_threads
|
||||||
)
|
)
|
||||||
|
|
||||||
predt <- predict(
|
predt <- predict(
|
||||||
|
|||||||
@ -9,7 +9,8 @@ test_that("load/save raw works", {
|
|||||||
nrounds <- 8
|
nrounds <- 8
|
||||||
booster <- xgboost(
|
booster <- xgboost(
|
||||||
data = train$data, label = train$label,
|
data = train$data, label = train$label,
|
||||||
nrounds = nrounds, objective = "binary:logistic"
|
nrounds = nrounds, objective = "binary:logistic",
|
||||||
|
nthread = 2
|
||||||
)
|
)
|
||||||
|
|
||||||
json_bytes <- xgb.save.raw(booster, raw_format = "json")
|
json_bytes <- xgb.save.raw(booster, raw_format = "json")
|
||||||
|
|||||||
@ -66,7 +66,7 @@ test_that("Models from previous versions of XGBoost can be loaded", {
|
|||||||
unzip(zipfile, exdir = extract_dir, overwrite = TRUE)
|
unzip(zipfile, exdir = extract_dir, overwrite = TRUE)
|
||||||
model_dir <- file.path(extract_dir, 'models')
|
model_dir <- file.path(extract_dir, 'models')
|
||||||
|
|
||||||
pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4))
|
pred_data <- xgb.DMatrix(matrix(c(0, 0, 0, 0), nrow = 1, ncol = 4), nthread = 2)
|
||||||
|
|
||||||
lapply(list.files(model_dir), function (x) {
|
lapply(list.files(model_dir), function (x) {
|
||||||
model_file <- file.path(model_dir, x)
|
model_file <- file.path(model_dir, x)
|
||||||
@ -87,6 +87,7 @@ test_that("Models from previous versions of XGBoost can be loaded", {
|
|||||||
booster <- readRDS(model_file)
|
booster <- readRDS(model_file)
|
||||||
} else {
|
} else {
|
||||||
booster <- xgb.load(model_file)
|
booster <- xgb.load(model_file)
|
||||||
|
xgb.parameters(booster) <- list(nthread = 2)
|
||||||
}
|
}
|
||||||
predict(booster, newdata = pred_data)
|
predict(booster, newdata = pred_data)
|
||||||
run_booster_check(booster, name)
|
run_booster_check(booster, name)
|
||||||
|
|||||||
@ -3,8 +3,12 @@ context('Test model params and call are exposed to R')
|
|||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = 'xgboost')
|
||||||
|
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
dtrain <- xgb.DMatrix(
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
agaricus.train$data, label = agaricus.train$label, nthread = 2
|
||||||
|
)
|
||||||
|
dtest <- xgb.DMatrix(
|
||||||
|
agaricus.test$data, label = agaricus.test$label, nthread = 2
|
||||||
|
)
|
||||||
|
|
||||||
bst <- xgboost(data = dtrain,
|
bst <- xgboost(data = dtrain,
|
||||||
max_depth = 2,
|
max_depth = 2,
|
||||||
|
|||||||
@ -4,8 +4,10 @@ set.seed(1994)
|
|||||||
|
|
||||||
test_that("Poisson regression works", {
|
test_that("Poisson regression works", {
|
||||||
data(mtcars)
|
data(mtcars)
|
||||||
bst <- xgboost(data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
|
bst <- xgboost(
|
||||||
objective = 'count:poisson', nrounds = 10, verbose = 0)
|
data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
|
||||||
|
objective = 'count:poisson', nrounds = 10, verbose = 0, nthread = 2
|
||||||
|
)
|
||||||
expect_equal(class(bst), "xgb.Booster")
|
expect_equal(class(bst), "xgb.Booster")
|
||||||
pred <- predict(bst, as.matrix(mtcars[, -11]))
|
pred <- predict(bst, as.matrix(mtcars[, -11]))
|
||||||
expect_equal(length(pred), 32)
|
expect_equal(length(pred), 32)
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
context('Learning to rank')
|
context('Learning to rank')
|
||||||
|
|
||||||
|
n_threads <- 2
|
||||||
|
|
||||||
test_that('Test ranking with unweighted data', {
|
test_that('Test ranking with unweighted data', {
|
||||||
X <- Matrix::sparseMatrix(
|
X <- Matrix::sparseMatrix(
|
||||||
i = c(2, 3, 7, 9, 12, 15, 17, 18)
|
i = c(2, 3, 7, 9, 12, 15, 17, 18)
|
||||||
@ -9,10 +11,10 @@ test_that('Test ranking with unweighted data', {
|
|||||||
)
|
)
|
||||||
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
|
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
|
||||||
group <- c(5, 5, 5, 5)
|
group <- c(5, 5, 5, 5)
|
||||||
dtrain <- xgb.DMatrix(X, label = y, group = group)
|
dtrain <- xgb.DMatrix(X, label = y, group = group, nthread = n_threads)
|
||||||
|
|
||||||
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
|
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
|
||||||
eval_metric = 'auc', eval_metric = 'aucpr')
|
eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
|
||||||
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
||||||
# Check if the metric is monotone increasing
|
# Check if the metric is monotone increasing
|
||||||
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
||||||
@ -29,10 +31,14 @@ test_that('Test ranking with weighted data', {
|
|||||||
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
|
y <- c(0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0)
|
||||||
group <- c(5, 5, 5, 5)
|
group <- c(5, 5, 5, 5)
|
||||||
weight <- c(1.0, 2.0, 3.0, 4.0)
|
weight <- c(1.0, 2.0, 3.0, 4.0)
|
||||||
dtrain <- xgb.DMatrix(X, label = y, group = group, weight = weight)
|
dtrain <- xgb.DMatrix(
|
||||||
|
X, label = y, group = group, weight = weight, nthread = n_threads
|
||||||
|
)
|
||||||
|
|
||||||
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
|
params <- list(
|
||||||
eval_metric = 'auc', eval_metric = 'aucpr')
|
eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
|
||||||
|
eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
|
||||||
|
)
|
||||||
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
||||||
# Check if the metric is monotone increasing
|
# Check if the metric is monotone increasing
|
||||||
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
||||||
|
|||||||
@ -16,6 +16,7 @@ test_that("Can save and load models with Unicode paths", {
|
|||||||
path <- file.path(tmpdir, x)
|
path <- file.path(tmpdir, x)
|
||||||
xgb.save(bst, path)
|
xgb.save(bst, path)
|
||||||
bst2 <- xgb.load(path)
|
bst2 <- xgb.load(path)
|
||||||
|
xgb.parameters(bst2) <- list(nthread = 2)
|
||||||
expect_equal(predict(bst, test$data), predict(bst2, test$data))
|
expect_equal(predict(bst, test$data), predict(bst2, test$data))
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@ -2,8 +2,15 @@ context("update trees in an existing model")
|
|||||||
|
|
||||||
data(agaricus.train, package = 'xgboost')
|
data(agaricus.train, package = 'xgboost')
|
||||||
data(agaricus.test, package = 'xgboost')
|
data(agaricus.test, package = 'xgboost')
|
||||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
|
||||||
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
n_threads <- 1
|
||||||
|
|
||||||
|
dtrain <- xgb.DMatrix(
|
||||||
|
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
dtest <- xgb.DMatrix(
|
||||||
|
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
|
||||||
|
)
|
||||||
|
|
||||||
# Disable flaky tests for 32-bit Windows.
|
# Disable flaky tests for 32-bit Windows.
|
||||||
# See https://github.com/dmlc/xgboost/issues/3720
|
# See https://github.com/dmlc/xgboost/issues/3720
|
||||||
@ -14,7 +21,7 @@ test_that("updating the model works", {
|
|||||||
|
|
||||||
# no-subsampling
|
# no-subsampling
|
||||||
p1 <- list(
|
p1 <- list(
|
||||||
objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2,
|
objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = n_threads,
|
||||||
updater = "grow_colmaker,prune"
|
updater = "grow_colmaker,prune"
|
||||||
)
|
)
|
||||||
set.seed(11)
|
set.seed(11)
|
||||||
@ -86,9 +93,11 @@ test_that("updating the model works", {
|
|||||||
})
|
})
|
||||||
|
|
||||||
test_that("updating works for multiclass & multitree", {
|
test_that("updating works for multiclass & multitree", {
|
||||||
dtr <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
|
dtr <- xgb.DMatrix(
|
||||||
|
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
|
||||||
|
)
|
||||||
watchlist <- list(train = dtr)
|
watchlist <- list(train = dtr)
|
||||||
p0 <- list(max_depth = 2, eta = 0.5, nthread = 2, subsample = 0.6,
|
p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
|
||||||
objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
|
objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
|
||||||
base_score = 0)
|
base_score = 0)
|
||||||
set.seed(121)
|
set.seed(121)
|
||||||
|
|||||||
@ -31,6 +31,8 @@ require(data.table)
|
|||||||
if (!require('vcd')) {
|
if (!require('vcd')) {
|
||||||
install.packages('vcd')
|
install.packages('vcd')
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data.table::setDTthreads(2)
|
||||||
```
|
```
|
||||||
|
|
||||||
> **VCD** package is used for one of its embedded dataset only.
|
> **VCD** package is used for one of its embedded dataset only.
|
||||||
@ -297,23 +299,25 @@ test <- agaricus.test
|
|||||||
|
|
||||||
#Random Forest - 1000 trees
|
#Random Forest - 1000 trees
|
||||||
bst <- xgboost(
|
bst <- xgboost(
|
||||||
data = train$data
|
data = train$data,
|
||||||
, label = train$label
|
label = train$label,
|
||||||
, max_depth = 4
|
max_depth = 4,
|
||||||
, num_parallel_tree = 1000
|
num_parallel_tree = 1000,
|
||||||
, subsample = 0.5
|
subsample = 0.5,
|
||||||
, colsample_bytree = 0.5
|
colsample_bytree = 0.5,
|
||||||
, nrounds = 1
|
nrounds = 1,
|
||||||
, objective = "binary:logistic"
|
objective = "binary:logistic",
|
||||||
|
nthread = 2
|
||||||
)
|
)
|
||||||
|
|
||||||
#Boosting - 3 rounds
|
#Boosting - 3 rounds
|
||||||
bst <- xgboost(
|
bst <- xgboost(
|
||||||
data = train$data
|
data = train$data,
|
||||||
, label = train$label
|
label = train$label,
|
||||||
, max_depth = 4
|
max_depth = 4,
|
||||||
, nrounds = 3
|
nrounds = 3,
|
||||||
, objective = "binary:logistic"
|
objective = "binary:logistic",
|
||||||
|
nthread = 2
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -86,9 +86,10 @@ data(agaricus.test, package='xgboost')
|
|||||||
train <- agaricus.train
|
train <- agaricus.train
|
||||||
test <- agaricus.test
|
test <- agaricus.test
|
||||||
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
|
bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1,
|
||||||
nrounds = 2, objective = "binary:logistic")
|
nrounds = 2, objective = "binary:logistic", nthread = 2)
|
||||||
xgb.save(bst, 'model.save')
|
xgb.save(bst, 'model.save')
|
||||||
bst = xgb.load('model.save')
|
bst = xgb.load('model.save')
|
||||||
|
xgb.parameters(bst) <- list(nthread = 2)
|
||||||
pred <- predict(bst, test$data)
|
pred <- predict(bst, test$data)
|
||||||
@
|
@
|
||||||
|
|
||||||
@ -127,7 +128,7 @@ training from initial prediction value, weighted training instance.
|
|||||||
|
|
||||||
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
|
We can use \verb@xgb.DMatrix@ to construct an \verb@xgb.DMatrix@ object:
|
||||||
<<xgb.DMatrix>>=
|
<<xgb.DMatrix>>=
|
||||||
dtrain <- xgb.DMatrix(train$data, label = train$label)
|
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = 2)
|
||||||
class(dtrain)
|
class(dtrain)
|
||||||
head(getinfo(dtrain,'label'))
|
head(getinfo(dtrain,'label'))
|
||||||
@
|
@
|
||||||
@ -161,9 +162,9 @@ evalerror <- function(preds, dtrain) {
|
|||||||
return(list(metric = "MSE", value = err))
|
return(list(metric = "MSE", value = err))
|
||||||
}
|
}
|
||||||
|
|
||||||
dtest <- xgb.DMatrix(test$data, label = test$label)
|
dtest <- xgb.DMatrix(test$data, label = test$label, nthread = 2)
|
||||||
watchlist <- list(eval = dtest, train = dtrain)
|
watchlist <- list(eval = dtest, train = dtrain)
|
||||||
param <- list(max_depth = 2, eta = 1)
|
param <- list(max_depth = 2, eta = 1, nthread = 2)
|
||||||
|
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
|
||||||
@
|
@
|
||||||
|
|||||||
@ -173,13 +173,13 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**
|
|||||||
|
|
||||||
```{r trainingDense, message=F, warning=F}
|
```{r trainingDense, message=F, warning=F}
|
||||||
bstDense <- xgboost(
|
bstDense <- xgboost(
|
||||||
data = as.matrix(train$data)
|
data = as.matrix(train$data),
|
||||||
, label = train$label
|
label = train$label,
|
||||||
, max_depth = 2
|
max_depth = 2,
|
||||||
, eta = 1
|
eta = 1,
|
||||||
, nthread = 2
|
nthread = 2,
|
||||||
, nrounds = 2
|
nrounds = 2,
|
||||||
, objective = "binary:logistic"
|
objective = "binary:logistic"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -188,14 +188,14 @@ bstDense <- xgboost(
|
|||||||
**XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.
|
**XGBoost** offers a way to group them in a `xgb.DMatrix`. You can even add other meta data in it. It will be useful for the most advanced features we will discover later.
|
||||||
|
|
||||||
```{r trainingDmatrix, message=F, warning=F}
|
```{r trainingDmatrix, message=F, warning=F}
|
||||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
|
||||||
bstDMatrix <- xgboost(
|
bstDMatrix <- xgboost(
|
||||||
data = dtrain
|
data = dtrain,
|
||||||
, max_depth = 2
|
max_depth = 2,
|
||||||
, eta = 1
|
eta = 1,
|
||||||
, nthread = 2
|
nthread = 2,
|
||||||
, nrounds = 2
|
nrounds = 2,
|
||||||
, objective = "binary:logistic"
|
objective = "binary:logistic"
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -314,8 +314,8 @@ Most of the features below have been implemented to help you to improve your mod
|
|||||||
For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
|
For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
|
||||||
|
|
||||||
```{r DMatrix, message=F, warning=F}
|
```{r DMatrix, message=F, warning=F}
|
||||||
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
|
dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
|
||||||
dtest <- xgb.DMatrix(data = test$data, label = test$label)
|
dtest <- xgb.DMatrix(data = test$data, label = test$label, nthread = 2)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Measure learning progress with xgb.train
|
### Measure learning progress with xgb.train
|
||||||
@ -476,6 +476,7 @@ An interesting test to see how identical our saved model is to the original one
|
|||||||
```{r loadModel, message=F, warning=F}
|
```{r loadModel, message=F, warning=F}
|
||||||
# load binary model to R
|
# load binary model to R
|
||||||
bst2 <- xgb.load("xgboost.model")
|
bst2 <- xgb.load("xgboost.model")
|
||||||
|
xgb.parameters(bst2) <- list(nthread = 2)
|
||||||
pred2 <- predict(bst2, test$data)
|
pred2 <- predict(bst2, test$data)
|
||||||
|
|
||||||
# And now the test
|
# And now the test
|
||||||
@ -500,6 +501,7 @@ print(class(rawVec))
|
|||||||
|
|
||||||
# load binary model to R
|
# load binary model to R
|
||||||
bst3 <- xgb.load(rawVec)
|
bst3 <- xgb.load(rawVec)
|
||||||
|
xgb.parameters(bst3) <- list(nthread = 2)
|
||||||
pred3 <- predict(bst3, test$data)
|
pred3 <- predict(bst3, test$data)
|
||||||
|
|
||||||
# pred2 should be identical to pred
|
# pred2 should be identical to pred
|
||||||
|
|||||||
@ -80,6 +80,24 @@ R package versioning
|
|||||||
====================
|
====================
|
||||||
See :ref:`release`.
|
See :ref:`release`.
|
||||||
|
|
||||||
|
Testing R package with different compilers
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
You can change the default compiler of R by changing the configuration file in home
|
||||||
|
directory. For instance, if you want to test XGBoost built with clang++ instead of g++ on
|
||||||
|
Linux, put the following in your ``~/.R/Makevars`` file:
|
||||||
|
|
||||||
|
.. code-block:: sh
|
||||||
|
|
||||||
|
CC=clang-15
|
||||||
|
CXX17=clang++-15
|
||||||
|
|
||||||
|
Be aware that the variable name should match with the name used by ``R CMD``:
|
||||||
|
|
||||||
|
.. code-block:: sh
|
||||||
|
|
||||||
|
R CMD config CXX17
|
||||||
|
|
||||||
Registering native routines in R
|
Registering native routines in R
|
||||||
================================
|
================================
|
||||||
According to `R extension manual <https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines>`_,
|
According to `R extension manual <https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Registering-native-routines>`_,
|
||||||
|
|||||||
@ -50,6 +50,7 @@ inline void EllpackPageSource::Fetch() {
|
|||||||
// silent the warning about unused variables.
|
// silent the warning about unused variables.
|
||||||
(void)(row_stride_);
|
(void)(row_stride_);
|
||||||
(void)(is_dense_);
|
(void)(is_dense_);
|
||||||
|
(void)(device_);
|
||||||
common::AssertGPUSupport();
|
common::AssertGPUSupport();
|
||||||
}
|
}
|
||||||
#endif // !defined(XGBOOST_USE_CUDA)
|
#endif // !defined(XGBOOST_USE_CUDA)
|
||||||
|
|||||||
@ -106,14 +106,30 @@ void GBTreeModel::Load(dmlc::Stream* fi) {
|
|||||||
Validate(*this);
|
Validate(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
std::int32_t IOThreads(Context const* ctx) {
|
||||||
|
CHECK(ctx);
|
||||||
|
std::int32_t n_threads = ctx->Threads();
|
||||||
|
// CRAN checks for number of threads used by examples, but we might not have the right
|
||||||
|
// number of threads when serializing/unserializing models as nthread is a booster
|
||||||
|
// parameter, which is only effective after booster initialization.
|
||||||
|
//
|
||||||
|
// The threshold ratio of CPU time to user time for R is 2.5, we set the number of
|
||||||
|
// threads to 2.
|
||||||
|
#if defined(XGBOOST_STRICT_R_MODE) && XGBOOST_STRICT_R_MODE == 1
|
||||||
|
n_threads = std::min(2, n_threads);
|
||||||
|
#endif
|
||||||
|
return n_threads;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
void GBTreeModel::SaveModel(Json* p_out) const {
|
void GBTreeModel::SaveModel(Json* p_out) const {
|
||||||
auto& out = *p_out;
|
auto& out = *p_out;
|
||||||
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
|
CHECK_EQ(param.num_trees, static_cast<int>(trees.size()));
|
||||||
out["gbtree_model_param"] = ToJson(param);
|
out["gbtree_model_param"] = ToJson(param);
|
||||||
std::vector<Json> trees_json(trees.size());
|
std::vector<Json> trees_json(trees.size());
|
||||||
|
|
||||||
CHECK(ctx_);
|
common::ParallelFor(trees.size(), IOThreads(ctx_), [&](auto t) {
|
||||||
common::ParallelFor(trees.size(), ctx_->Threads(), [&](auto t) {
|
|
||||||
auto const& tree = trees[t];
|
auto const& tree = trees[t];
|
||||||
Json jtree{Object{}};
|
Json jtree{Object{}};
|
||||||
tree->SaveModel(&jtree);
|
tree->SaveModel(&jtree);
|
||||||
@ -151,9 +167,7 @@ void GBTreeModel::LoadModel(Json const& in) {
|
|||||||
CHECK_EQ(tree_info_json.size(), param.num_trees);
|
CHECK_EQ(tree_info_json.size(), param.num_trees);
|
||||||
tree_info.resize(param.num_trees);
|
tree_info.resize(param.num_trees);
|
||||||
|
|
||||||
CHECK(ctx_);
|
common::ParallelFor(param.num_trees, IOThreads(ctx_), [&](auto t) {
|
||||||
|
|
||||||
common::ParallelFor(param.num_trees, ctx_->Threads(), [&](auto t) {
|
|
||||||
auto tree_id = get<Integer const>(trees_json[t]["id"]);
|
auto tree_id = get<Integer const>(trees_json[t]["id"]);
|
||||||
trees.at(tree_id).reset(new RegTree{});
|
trees.at(tree_id).reset(new RegTree{});
|
||||||
trees[tree_id]->LoadModel(trees_json[t]);
|
trees[tree_id]->LoadModel(trees_json[t]);
|
||||||
|
|||||||
@ -3,9 +3,15 @@ import argparse
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
|
from io import StringIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from platform import system
|
from platform import system
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pandas as pd
|
||||||
|
except ImportError:
|
||||||
|
pd = None
|
||||||
|
|
||||||
from test_utils import R_PACKAGE, ROOT, DirectoryExcursion, cd, print_time, record_time
|
from test_utils import R_PACKAGE, ROOT, DirectoryExcursion, cd, print_time, record_time
|
||||||
|
|
||||||
|
|
||||||
@ -97,16 +103,47 @@ def build_rpackage(path: str) -> str:
|
|||||||
return tarball
|
return tarball
|
||||||
|
|
||||||
|
|
||||||
|
def check_example_timing(rcheck_dir: Path, threshold: float) -> None:
|
||||||
|
with open(rcheck_dir / "xgboost-Ex.timings", "r") as fd:
|
||||||
|
timings = fd.readlines()
|
||||||
|
newlines = []
|
||||||
|
for line in timings:
|
||||||
|
line = line.strip()
|
||||||
|
newlines.append(line)
|
||||||
|
con_timings = "\n".join(newlines)
|
||||||
|
df = pd.read_csv(StringIO(con_timings), delimiter="\t")
|
||||||
|
ratio_n = "user/elapsed"
|
||||||
|
df[ratio_n] = df["user"] / df["elapsed"]
|
||||||
|
offending = df[df[ratio_n] > threshold]
|
||||||
|
|
||||||
|
try:
|
||||||
|
# requires the tabulate package
|
||||||
|
df.to_markdown("timings.md")
|
||||||
|
offending.to_markdown("offending.md")
|
||||||
|
except ImportError:
|
||||||
|
print("failed to export markdown files.")
|
||||||
|
pass
|
||||||
|
|
||||||
|
if offending.shape[0] == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
print(offending)
|
||||||
|
raise ValueError("There are examples using too many threads")
|
||||||
|
|
||||||
|
|
||||||
@cd(ROOT)
|
@cd(ROOT)
|
||||||
@record_time
|
@record_time
|
||||||
def check_rpackage(path: str) -> None:
|
def check_rpackage(path: str) -> None:
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
print("Ncpus:", f"{os.cpu_count()}")
|
print("Ncpus:", f"{os.cpu_count()}")
|
||||||
|
threshold = 2.5
|
||||||
env.update(
|
env.update(
|
||||||
{
|
{
|
||||||
"MAKEFLAGS": f"-j{os.cpu_count()}",
|
"MAKEFLAGS": f"-j{os.cpu_count()}",
|
||||||
# cran specific environment variables
|
# cran specific environment variables
|
||||||
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(2.5),
|
"_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
|
||||||
|
"_R_CHECK_TEST_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
|
||||||
|
"_R_CHECK_VIGNETTE_TIMING_CPU_TO_ELAPSED_THRESHOLD_": str(threshold),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -118,11 +155,14 @@ def check_rpackage(path: str) -> None:
|
|||||||
CC = os.path.join(mingw_bin, "gcc.exe")
|
CC = os.path.join(mingw_bin, "gcc.exe")
|
||||||
env.update({"CC": CC, "CXX": CXX})
|
env.update({"CC": CC, "CXX": CXX})
|
||||||
|
|
||||||
status = subprocess.run([R, "CMD", "check", "--as-cran", path], env=env)
|
status = subprocess.run(
|
||||||
with open(Path("xgboost.Rcheck") / "00check.log", "r") as fd:
|
[R, "CMD", "check", "--as-cran", "--timings", path], env=env
|
||||||
|
)
|
||||||
|
rcheck_dir = Path("xgboost.Rcheck")
|
||||||
|
with open(rcheck_dir / "00check.log", "r") as fd:
|
||||||
check_log = fd.read()
|
check_log = fd.read()
|
||||||
|
|
||||||
with open(Path("xgboost.Rcheck") / "00install.out", "r") as fd:
|
with open(rcheck_dir / "00install.out", "r") as fd:
|
||||||
install_log = fd.read()
|
install_log = fd.read()
|
||||||
|
|
||||||
msg = f"""
|
msg = f"""
|
||||||
@ -144,6 +184,8 @@ def check_rpackage(path: str) -> None:
|
|||||||
if check_log.find("Examples with CPU time") != -1:
|
if check_log.find("Examples with CPU time") != -1:
|
||||||
print(msg)
|
print(msg)
|
||||||
raise ValueError("Suspicious NOTE.")
|
raise ValueError("Suspicious NOTE.")
|
||||||
|
if pd is not None:
|
||||||
|
check_example_timing(rcheck_dir, threshold)
|
||||||
|
|
||||||
|
|
||||||
@cd(R_PACKAGE)
|
@cd(R_PACKAGE)
|
||||||
@ -264,6 +306,8 @@ def main(args: argparse.Namespace) -> None:
|
|||||||
test_with_autotools()
|
test_with_autotools()
|
||||||
else:
|
else:
|
||||||
test_with_cmake(args)
|
test_with_cmake(args)
|
||||||
|
elif args.task == "timings":
|
||||||
|
check_example_timing(Path("xgboost.Rcheck"), 2.5)
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unexpected task.")
|
raise ValueError("Unexpected task.")
|
||||||
|
|
||||||
@ -279,7 +323,7 @@ if __name__ == "__main__":
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--task",
|
"--task",
|
||||||
type=str,
|
type=str,
|
||||||
choices=["pack", "build", "check", "doc"],
|
choices=["pack", "build", "check", "doc", "timings"],
|
||||||
default="check",
|
default="check",
|
||||||
required=False,
|
required=False,
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user