[R] Redesigned xgboost() interface skeleton (#10456)
--------- Co-authored-by: Michael Mayer <mayermichael79@gmail.com>
This commit is contained in:
@@ -30,6 +30,40 @@ NVL <- function(x, val) {
|
||||
return(c('rank:pairwise', 'rank:ndcg', 'rank:map'))
|
||||
}
|
||||
|
||||
.OBJECTIVES_NON_DEFAULT_MODE <- function() {
|
||||
return(c("reg:logistic", "binary:logitraw", "multi:softmax"))
|
||||
}
|
||||
|
||||
.BINARY_CLASSIF_OBJECTIVES <- function() {
|
||||
return(c("binary:logistic", "binary:hinge"))
|
||||
}
|
||||
|
||||
.MULTICLASS_CLASSIF_OBJECTIVES <- function() {
|
||||
return("multi:softprob")
|
||||
}
|
||||
|
||||
.SURVIVAL_RIGHT_CENSORING_OBJECTIVES <- function() { # nolint
|
||||
return(c("survival:cox", "survival:aft"))
|
||||
}
|
||||
|
||||
.SURVIVAL_ALL_CENSORING_OBJECTIVES <- function() { # nolint
|
||||
return("survival:aft")
|
||||
}
|
||||
|
||||
.REGRESSION_OBJECTIVES <- function() {
|
||||
return(c(
|
||||
"reg:squarederror", "reg:squaredlogerror", "reg:logistic", "reg:pseudohubererror",
|
||||
"reg:absoluteerror", "reg:quantileerror", "count:poisson", "reg:gamma", "reg:tweedie"
|
||||
))
|
||||
}
|
||||
|
||||
.MULTI_TARGET_OBJECTIVES <- function() {
|
||||
return(c(
|
||||
"reg:squarederror", "reg:squaredlogerror", "reg:logistic", "reg:pseudohubererror",
|
||||
"reg:quantileerror", "reg:gamma"
|
||||
))
|
||||
}
|
||||
|
||||
|
||||
#
|
||||
# Low-level functions for boosting --------------------------------------------
|
||||
|
||||
@@ -663,9 +663,8 @@ validate.features <- function(bst, newdata) {
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#' train <- agaricus.train
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' data = train$data,
|
||||
#' label = train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(train$data, label = train$label),
|
||||
#' max_depth = 2,
|
||||
#' eta = 1,
|
||||
#' nthread = 2,
|
||||
@@ -767,9 +766,8 @@ xgb.attributes <- function(object) {
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#' train <- agaricus.train
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' data = train$data,
|
||||
#' label = train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(train$data, label = train$label),
|
||||
#' max_depth = 2,
|
||||
#' eta = 1,
|
||||
#' nthread = nthread,
|
||||
@@ -817,9 +815,8 @@ xgb.config <- function(object) {
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#' train <- agaricus.train
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' data = train$data,
|
||||
#' label = train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(train$data, label = train$label),
|
||||
#' max_depth = 2,
|
||||
#' eta = 1,
|
||||
#' nthread = 2,
|
||||
@@ -1230,9 +1227,8 @@ xgb.is.same.Booster <- function(obj1, obj2) {
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#' train <- agaricus.train
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' data = train$data,
|
||||
#' label = train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(train$data, label = train$label),
|
||||
#' max_depth = 2,
|
||||
#' eta = 1,
|
||||
#' nthread = 2,
|
||||
|
||||
@@ -853,36 +853,6 @@ xgb.DMatrix.hasinfo <- function(object, info) {
|
||||
}
|
||||
|
||||
|
||||
# get dmatrix from data, label
|
||||
# internal helper method
|
||||
xgb.get.DMatrix <- function(data, label, missing, weight, nthread) {
|
||||
if (inherits(data, "dgCMatrix") || is.matrix(data)) {
|
||||
if (is.null(label)) {
|
||||
stop("label must be provided when data is a matrix")
|
||||
}
|
||||
dtrain <- xgb.DMatrix(data, label = label, missing = missing, nthread = nthread)
|
||||
if (!is.null(weight)) {
|
||||
setinfo(dtrain, "weight", weight)
|
||||
}
|
||||
} else {
|
||||
if (!is.null(label)) {
|
||||
warning("xgboost: label will be ignored.")
|
||||
}
|
||||
if (is.character(data)) {
|
||||
data <- path.expand(data)
|
||||
dtrain <- xgb.DMatrix(data[1])
|
||||
} else if (inherits(data, "xgb.DMatrix")) {
|
||||
dtrain <- data
|
||||
} else if (inherits(data, "data.frame")) {
|
||||
stop("xgboost doesn't support data.frame as input. Convert it to matrix first.")
|
||||
} else {
|
||||
stop("xgboost: invalid input data")
|
||||
}
|
||||
}
|
||||
return(dtrain)
|
||||
}
|
||||
|
||||
|
||||
#' Dimensions of xgb.DMatrix
|
||||
#'
|
||||
#' Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}.
|
||||
|
||||
@@ -29,8 +29,8 @@
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
#' # save the model in file 'xgb.model.dump'
|
||||
#' dump_path = file.path(tempdir(), 'model.dump')
|
||||
#' xgb.dump(bst, dump_path, with_stats = TRUE)
|
||||
|
||||
@@ -46,9 +46,8 @@
|
||||
#' # binomial classification using "gbtree":
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' data = agaricus.train$data,
|
||||
#' label = agaricus.train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
||||
#' max_depth = 2,
|
||||
#' eta = 1,
|
||||
#' nthread = 2,
|
||||
@@ -59,9 +58,8 @@
|
||||
#' xgb.importance(model = bst)
|
||||
#'
|
||||
#' # binomial classification using "gblinear":
|
||||
#' bst <- xgboost(
|
||||
#' data = agaricus.train$data,
|
||||
#' label = agaricus.train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
||||
#' booster = "gblinear",
|
||||
#' eta = 0.3,
|
||||
#' nthread = 1,
|
||||
@@ -73,9 +71,11 @@
|
||||
#' # multiclass classification using "gbtree":
|
||||
#' nclass <- 3
|
||||
#' nrounds <- 10
|
||||
#' mbst <- xgboost(
|
||||
#' data = as.matrix(iris[, -5]),
|
||||
#' label = as.numeric(iris$Species) - 1,
|
||||
#' mbst <- xgb.train(
|
||||
#' data = xgb.DMatrix(
|
||||
#' as.matrix(iris[, -5]),
|
||||
#' label = as.numeric(iris$Species) - 1
|
||||
#' ),
|
||||
#' max_depth = 3,
|
||||
#' eta = 0.2,
|
||||
#' nthread = 2,
|
||||
@@ -99,9 +99,11 @@
|
||||
#' )
|
||||
#'
|
||||
#' # multiclass classification using "gblinear":
|
||||
#' mbst <- xgboost(
|
||||
#' data = scale(as.matrix(iris[, -5])),
|
||||
#' label = as.numeric(iris$Species) - 1,
|
||||
#' mbst <- xgb.train(
|
||||
#' data = xgb.DMatrix(
|
||||
#' scale(as.matrix(iris[, -5])),
|
||||
#' label = as.numeric(iris$Species) - 1
|
||||
#' ),
|
||||
#' booster = "gblinear",
|
||||
#' eta = 0.2,
|
||||
#' nthread = 1,
|
||||
|
||||
@@ -43,9 +43,8 @@
|
||||
#' nthread <- 1
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' data = agaricus.train$data,
|
||||
#' label = agaricus.train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
||||
#' max_depth = 2,
|
||||
#' eta = 1,
|
||||
#' nthread = nthread,
|
||||
|
||||
@@ -48,9 +48,8 @@
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' ## Change max_depth to a higher number to get a more significant result
|
||||
#' bst <- xgboost(
|
||||
#' data = agaricus.train$data,
|
||||
#' label = agaricus.train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
||||
#' max_depth = 6,
|
||||
#' nthread = nthread,
|
||||
#' nrounds = 50,
|
||||
|
||||
@@ -51,9 +51,8 @@
|
||||
#' nthread <- 2
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' data = agaricus.train$data,
|
||||
#' label = agaricus.train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
||||
#' max_depth = 3,
|
||||
#' eta = 1,
|
||||
#' nthread = nthread,
|
||||
|
||||
@@ -35,9 +35,8 @@
|
||||
#' nthread <- 2
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' data = agaricus.train$data,
|
||||
#' label = agaricus.train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
||||
#' max_depth = 15,
|
||||
#' eta = 1,
|
||||
#' nthread = nthread,
|
||||
|
||||
@@ -82,9 +82,8 @@
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#' nrounds <- 20
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' agaricus.train$data,
|
||||
#' agaricus.train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(agaricus.train$data, agaricus.train$label),
|
||||
#' nrounds = nrounds,
|
||||
#' eta = 0.1,
|
||||
#' max_depth = 3,
|
||||
@@ -108,9 +107,8 @@
|
||||
#' set.seed(123)
|
||||
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
||||
#'
|
||||
#' mbst <- xgboost(
|
||||
#' data = x,
|
||||
#' label = as.numeric(iris$Species) - 1,
|
||||
#' mbst <- xgb.train(
|
||||
#' data = xgb.DMatrix(x, label = as.numeric(iris$Species) - 1),
|
||||
#' nrounds = nrounds,
|
||||
#' max_depth = 2,
|
||||
#' eta = 0.3,
|
||||
|
||||
@@ -68,9 +68,8 @@
|
||||
#' @examples
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' data = agaricus.train$data,
|
||||
#' label = agaricus.train$label,
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(agaricus.train$data, agaricus.train$label),
|
||||
#' max_depth = 3,
|
||||
#' eta = 1,
|
||||
#' nthread = 2,
|
||||
|
||||
@@ -182,12 +182,6 @@
|
||||
#' as R attributes, and thus do not get saved when using XGBoost's own serializaters like
|
||||
#' \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
|
||||
#' @param ... other parameters to pass to \code{params}.
|
||||
#' @param label vector of response values. Should not be provided when data is
|
||||
#' a local data file name or an \code{xgb.DMatrix}.
|
||||
#' @param missing by default is set to NA, which means that NA values should be considered as 'missing'
|
||||
#' by the algorithm. Sometimes, 0 or other extreme value might be used to represent missing values.
|
||||
#' This parameter is only used when input is a dense matrix.
|
||||
#' @param weight a vector indicating the weight for each row of the input.
|
||||
#'
|
||||
#' @return
|
||||
#' An object of class \code{xgb.Booster}.
|
||||
@@ -328,12 +322,10 @@
|
||||
#' early_stopping_rounds = 3)
|
||||
#'
|
||||
#' ## An 'xgboost' interface example:
|
||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
||||
#' max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
|
||||
#' objective = "binary:logistic")
|
||||
#' bst <- xgboost(x = agaricus.train$data, y = factor(agaricus.train$label),
|
||||
#' params = list(max_depth = 2, eta = 1), nthread = nthread, nrounds = 2)
|
||||
#' pred <- predict(bst, agaricus.test$data)
|
||||
#'
|
||||
#' @rdname xgb.train
|
||||
#' @export
|
||||
xgb.train <- function(params = list(), data, nrounds, evals = list(),
|
||||
obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user