[R] Redesigned xgboost() interface skeleton (#10456)

--------- Co-authored-by: Michael Mayer <mayermichael79@gmail.com>
2024-07-15 12:44:58 +02:00 · 2024-07-15 12:44:58 +02:00 · ab982e7873
commit ab982e7873
parent 17c64300e3
35 changed files with 1997 additions and 242 deletions
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@ -57,7 +57,8 @@ Suggests:
    igraph (>= 1.0.1),
    float,
    titanic,
-    RhpcBLASctl
+    RhpcBLASctl,
    survival
 Depends:
    R (>= 4.3.0)
 Imports:
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -13,6 +13,7 @@ S3method(predict,xgb.Booster)
 S3method(print,xgb.Booster)
 S3method(print,xgb.DMatrix)
 S3method(print,xgb.cv.synchronous)
 S3method(print,xgboost)
 S3method(setinfo,xgb.Booster)
 S3method(setinfo,xgb.DMatrix)
 S3method(variable.names,xgb.Booster)
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -30,6 +30,40 @@ NVL <- function(x, val) {
  return(c('rank:pairwise', 'rank:ndcg', 'rank:map'))
 }
 .OBJECTIVES_NON_DEFAULT_MODE <- function() {
  return(c("reg:logistic", "binary:logitraw", "multi:softmax"))
 }
 .BINARY_CLASSIF_OBJECTIVES <- function() {
  return(c("binary:logistic", "binary:hinge"))
 }
 .MULTICLASS_CLASSIF_OBJECTIVES <- function() {
  return("multi:softprob")
 }
 .SURVIVAL_RIGHT_CENSORING_OBJECTIVES <- function() { # nolint
  return(c("survival:cox", "survival:aft"))
 }
 .SURVIVAL_ALL_CENSORING_OBJECTIVES <- function() { # nolint
  return("survival:aft")
 }
 .REGRESSION_OBJECTIVES <- function() {
  return(c(
    "reg:squarederror", "reg:squaredlogerror", "reg:logistic", "reg:pseudohubererror",
    "reg:absoluteerror", "reg:quantileerror", "count:poisson", "reg:gamma", "reg:tweedie"
  ))
 }
 .MULTI_TARGET_OBJECTIVES <- function() {
  return(c(
    "reg:squarederror", "reg:squaredlogerror", "reg:logistic", "reg:pseudohubererror",
    "reg:quantileerror", "reg:gamma"
  ))
 }
 #
 # Low-level functions for boosting --------------------------------------------
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@ -663,9 +663,8 @@ validate.features <- function(bst, newdata) {
 #' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
 #'
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = train$data,
+#'   data = xgb.DMatrix(train$data, label = train$label),
 #'   label = train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = 2,
@ -767,9 +766,8 @@ xgb.attributes <- function(object) {
 #' data.table::setDTthreads(nthread)
 #' train <- agaricus.train
 #'
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = train$data,
+#'   data = xgb.DMatrix(train$data, label = train$label),
 #'   label = train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = nthread,
@ -817,9 +815,8 @@ xgb.config <- function(object) {
 #' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
 #'
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = train$data,
+#'   data = xgb.DMatrix(train$data, label = train$label),
 #'   label = train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = 2,
@ -1230,9 +1227,8 @@ xgb.is.same.Booster <- function(obj1, obj2) {
 #' data(agaricus.train, package = "xgboost")
 #' train <- agaricus.train
 #'
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = train$data,
+#'   data = xgb.DMatrix(train$data, label = train$label),
 #'   label = train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = 2,
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@ -853,36 +853,6 @@ xgb.DMatrix.hasinfo <- function(object, info) {
 }
 # get dmatrix from data, label
 # internal helper method
 xgb.get.DMatrix <- function(data, label, missing, weight, nthread) {
  if (inherits(data, "dgCMatrix") || is.matrix(data)) {
    if (is.null(label)) {
      stop("label must be provided when data is a matrix")
    }
    dtrain <- xgb.DMatrix(data, label = label, missing = missing, nthread = nthread)
    if (!is.null(weight)) {
      setinfo(dtrain, "weight", weight)
    }
  } else {
    if (!is.null(label)) {
      warning("xgboost: label will be ignored.")
    }
    if (is.character(data)) {
      data <- path.expand(data)
      dtrain <- xgb.DMatrix(data[1])
    } else if (inherits(data, "xgb.DMatrix")) {
      dtrain <- data
    } else if (inherits(data, "data.frame")) {
      stop("xgboost doesn't support data.frame as input. Convert it to matrix first.")
    } else {
      stop("xgboost: invalid input data")
    }
  }
  return(dtrain)
 }
 #' Dimensions of xgb.DMatrix
 #'
 #' Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}.
--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@ -29,8 +29,8 @@
 #' data(agaricus.test, package='xgboost')
 #' train <- agaricus.train
 #' test <- agaricus.test
-#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
-#'                eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#'                  eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 #' # save the model in file 'xgb.model.dump'
 #' dump_path = file.path(tempdir(), 'model.dump')
 #' xgb.dump(bst, dump_path, with_stats = TRUE)
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@ -46,9 +46,8 @@
 #' # binomial classification using "gbtree":
 #' data(agaricus.train, package = "xgboost")
 #'
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = agaricus.train$data,
+#'   data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
 #'   label = agaricus.train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = 2,
@ -59,9 +58,8 @@
 #' xgb.importance(model = bst)
 #'
 #' # binomial classification using "gblinear":
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = agaricus.train$data,
+#'   data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
 #'   label = agaricus.train$label,
 #'   booster = "gblinear",
 #'   eta = 0.3,
 #'   nthread = 1,
@ -73,9 +71,11 @@
 #' # multiclass classification using "gbtree":
 #' nclass <- 3
 #' nrounds <- 10
-#' mbst <- xgboost(
+#' mbst <- xgb.train(
-#'   data = as.matrix(iris[, -5]),
+#'   data = xgb.DMatrix(
-#'   label = as.numeric(iris$Species) - 1,
+#'     as.matrix(iris[, -5]),
 #'     label = as.numeric(iris$Species) - 1
 #'   ),
 #'   max_depth = 3,
 #'   eta = 0.2,
 #'   nthread = 2,
@ -99,9 +99,11 @@
 #' )
 #'
 #' # multiclass classification using "gblinear":
-#' mbst <- xgboost(
+#' mbst <- xgb.train(
-#'   data = scale(as.matrix(iris[, -5])),
+#'   data = xgb.DMatrix(
-#'   label = as.numeric(iris$Species) - 1,
+#'     scale(as.matrix(iris[, -5])),
 #'     label = as.numeric(iris$Species) - 1
 #'   ),
 #'   booster = "gblinear",
 #'   eta = 0.2,
 #'   nthread = 1,
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@ -43,9 +43,8 @@
 #' nthread <- 1
 #' data.table::setDTthreads(nthread)
 #'
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = agaricus.train$data,
+#'   data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
 #'   label = agaricus.train$label,
 #'   max_depth = 2,
 #'   eta = 1,
 #'   nthread = nthread,
--- a/R-package/R/xgb.plot.deepness.R
+++ b/R-package/R/xgb.plot.deepness.R
@ -48,9 +48,8 @@
 #' data.table::setDTthreads(nthread)
 #'
 #' ## Change max_depth to a higher number to get a more significant result
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = agaricus.train$data,
+#'   data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
 #'   label = agaricus.train$label,
 #'   max_depth = 6,
 #'   nthread = nthread,
 #'   nrounds = 50,
--- a/R-package/R/xgb.plot.importance.R
+++ b/R-package/R/xgb.plot.importance.R
@ -51,9 +51,8 @@
 #' nthread <- 2
 #' data.table::setDTthreads(nthread)
 #'
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = agaricus.train$data,
+#'   data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
 #'   label = agaricus.train$label,
 #'   max_depth = 3,
 #'   eta = 1,
 #'   nthread = nthread,
--- a/R-package/R/xgb.plot.multi.trees.R
+++ b/R-package/R/xgb.plot.multi.trees.R
@ -35,9 +35,8 @@
 #' nthread <- 2
 #' data.table::setDTthreads(nthread)
 #'
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = agaricus.train$data,
+#'   data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
 #'   label = agaricus.train$label,
 #'   max_depth = 15,
 #'   eta = 1,
 #'   nthread = nthread,
--- a/R-package/R/xgb.plot.shap.R
+++ b/R-package/R/xgb.plot.shap.R
@ -82,9 +82,8 @@
 #' data.table::setDTthreads(nthread)
 #' nrounds <- 20
 #'
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   agaricus.train$data,
+#'   data = xgb.DMatrix(agaricus.train$data, agaricus.train$label),
 #'   agaricus.train$label,
 #'   nrounds = nrounds,
 #'   eta = 0.1,
 #'   max_depth = 3,
@ -108,9 +107,8 @@
 #' set.seed(123)
 #' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
 #'
-#' mbst <- xgboost(
+#' mbst <- xgb.train(
-#'   data = x,
+#'   data = xgb.DMatrix(x, label = as.numeric(iris$Species) - 1),
 #'   label = as.numeric(iris$Species) - 1,
 #'   nrounds = nrounds,
 #'   max_depth = 2,
 #'   eta = 0.3,
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@ -68,9 +68,8 @@
 #' @examples
 #' data(agaricus.train, package = "xgboost")
 #'
-#' bst <- xgboost(
+#' bst <- xgb.train(
-#'   data = agaricus.train$data,
+#'   data = xgb.DMatrix(agaricus.train$data, agaricus.train$label),
 #'   label = agaricus.train$label,
 #'   max_depth = 3,
 #'   eta = 1,
 #'   nthread = 2,
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@ -182,12 +182,6 @@
 #'        as R attributes, and thus do not get saved when using XGBoost's own serializaters like
 #'        \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
 #' @param ... other parameters to pass to \code{params}.
 #' @param label vector of response values. Should not be provided when data is
 #'        a local data file name or an \code{xgb.DMatrix}.
 #' @param missing by default is set to NA, which means that NA values should be considered as 'missing'
 #'        by the algorithm. Sometimes, 0 or other extreme value might be used to represent missing values.
 #'        This parameter is only used when input is a dense matrix.
 #' @param weight a vector indicating the weight for each row of the input.
 #'
 #' @return
 #' An object of class \code{xgb.Booster}.
@ -328,12 +322,10 @@
 #'                  early_stopping_rounds = 3)
 #'
 #' ## An 'xgboost' interface example:
-#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
+#' bst <- xgboost(x = agaricus.train$data, y = factor(agaricus.train$label),
-#'                max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
+#'                params = list(max_depth = 2, eta = 1), nthread = nthread, nrounds = 2)
 #'                objective = "binary:logistic")
 #' pred <- predict(bst, agaricus.test$data)
 #'
 #' @rdname xgb.train
 #' @export
 xgb.train <- function(params = list(), data, nrounds, evals = list(),
                      obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
--- a/R-package/demo/basic_walkthrough.R
+++ b/R-package/demo/basic_walkthrough.R
@ -16,29 +16,28 @@ class(train$data)
 # note: we are putting in sparse matrix here, xgboost naturally handles sparse input
 # use sparse matrix when your feature is sparse(e.g. when you are using one-hot encoding vector)
 print("Training xgboost with sparseMatrix")
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1, nrounds = 2,
+bst <- xgboost(x = train$data, y = factor(train$label, c(0, 1)),
-               nthread = 2, objective = "binary:logistic")
+               params = list(max_depth = 2, eta = 1),
               nrounds = 2, nthread = 2)
 # alternatively, you can put in dense matrix, i.e. basic R-matrix
 print("Training xgboost with Matrix")
-bst <- xgboost(data = as.matrix(train$data), label = train$label, max_depth = 2, eta = 1, nrounds = 2,
+bst <- xgboost(x = as.matrix(train$data), y = factor(train$label, c(0, 1)),
-               nthread = 2, objective = "binary:logistic")
+               params = list(max_depth = 2, eta = 1),
               nrounds = 2, nthread = 2)
 # you can also put in xgb.DMatrix object, which stores label, data and other meta datas needed for advanced features
 print("Training xgboost with xgb.DMatrix")
 dtrain <- xgb.DMatrix(data = train$data, label = train$label)
-bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, nthread = 2,
+params <- list(max_depth = 2, eta = 1, nthread = 2, objective = "binary:logistic")
-               objective = "binary:logistic")
+bst <- xgb.train(data = dtrain, params = params, nrounds = 2)
 # Verbose = 0,1,2
 print("Train xgboost with verbose 0, no message")
-bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
+bst <- xgb.train(data = dtrain, params = params, nrounds = 2, verbose = 0)
               nthread = 2, objective = "binary:logistic", verbose = 0)
 print("Train xgboost with verbose 1, print evaluation metric")
-bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
+bst <- xgb.train(data = dtrain, params = params, nrounds = 2, verbose = 1)
               nthread = 2, objective = "binary:logistic", verbose = 1)
 print("Train xgboost with verbose 2, also print information about tree")
-bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, nrounds = 2,
+bst <- xgb.train(data = dtrain, params = params, nrounds = 2, verbose = 2)
               nthread = 2, objective = "binary:logistic", verbose = 2)
 # you can also specify data as file path to a LIBSVM format input
 # since we do not have this file with us, the following line is just for illustration
--- a/R-package/man/print.xgb.Booster.Rd
+++ b/R-package/man/print.xgb.Booster.Rd
@ -21,9 +21,8 @@ Print information about \code{xgb.Booster}.
 data(agaricus.train, package = "xgboost")
 train <- agaricus.train
-bst <- xgboost(
+bst <- xgb.train(
-  data = train$data,
+  data = xgb.DMatrix(train$data, label = train$label),
  label = train$label,
  max_depth = 2,
  eta = 1,
  nthread = 2,
--- a/R-package/man/xgb.attr.Rd
+++ b/R-package/man/xgb.attr.Rd
@ -64,9 +64,8 @@ example of these behaviors).
 data(agaricus.train, package = "xgboost")
 train <- agaricus.train
-bst <- xgboost(
+bst <- xgb.train(
-  data = train$data,
+  data = xgb.DMatrix(train$data, label = train$label),
  label = train$label,
  max_depth = 2,
  eta = 1,
  nthread = 2,
--- a/R-package/man/xgb.config.Rd
+++ b/R-package/man/xgb.config.Rd
@ -35,9 +35,8 @@ nthread <- 1
 data.table::setDTthreads(nthread)
 train <- agaricus.train
-bst <- xgboost(
+bst <- xgb.train(
-  data = train$data,
+  data = xgb.DMatrix(train$data, label = train$label),
  label = train$label,
  max_depth = 2,
  eta = 1,
  nthread = nthread,
--- a/R-package/man/xgb.dump.Rd
+++ b/R-package/man/xgb.dump.Rd
@ -49,8 +49,8 @@ data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
 train <- agaricus.train
 test <- agaricus.test
-bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
+bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
-               eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+                 eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
 # save the model in file 'xgb.model.dump'
 dump_path = file.path(tempdir(), 'model.dump')
 xgb.dump(bst, dump_path, with_stats = TRUE)
--- a/R-package/man/xgb.importance.Rd
+++ b/R-package/man/xgb.importance.Rd
@ -70,9 +70,8 @@ be on the same scale (which is also recommended when using L1 or L2 regularizati
 # binomial classification using "gbtree":
 data(agaricus.train, package = "xgboost")
-bst <- xgboost(
+bst <- xgb.train(
-  data = agaricus.train$data,
+  data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
  label = agaricus.train$label,
  max_depth = 2,
  eta = 1,
  nthread = 2,
@ -83,9 +82,8 @@ bst <- xgboost(
 xgb.importance(model = bst)
 # binomial classification using "gblinear":
-bst <- xgboost(
+bst <- xgb.train(
-  data = agaricus.train$data,
+  data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
  label = agaricus.train$label,
  booster = "gblinear",
  eta = 0.3,
  nthread = 1,
@ -97,9 +95,11 @@ xgb.importance(model = bst)
 # multiclass classification using "gbtree":
 nclass <- 3
 nrounds <- 10
-mbst <- xgboost(
+mbst <- xgb.train(
-  data = as.matrix(iris[, -5]),
+  data = xgb.DMatrix(
-  label = as.numeric(iris$Species) - 1,
+    as.matrix(iris[, -5]),
    label = as.numeric(iris$Species) - 1
  ),
  max_depth = 3,
  eta = 0.2,
  nthread = 2,
@ -123,9 +123,11 @@ xgb.importance(
 )
 # multiclass classification using "gblinear":
-mbst <- xgboost(
+mbst <- xgb.train(
-  data = scale(as.matrix(iris[, -5])),
+  data = xgb.DMatrix(
-  label = as.numeric(iris$Species) - 1,
+    scale(as.matrix(iris[, -5])),
    label = as.numeric(iris$Species) - 1
  ),
  booster = "gblinear",
  eta = 0.2,
  nthread = 1,
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@ -63,9 +63,8 @@ data(agaricus.train, package = "xgboost")
 nthread <- 1
 data.table::setDTthreads(nthread)
-bst <- xgboost(
+bst <- xgb.train(
-  data = agaricus.train$data,
+  data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
  label = agaricus.train$label,
  max_depth = 2,
  eta = 1,
  nthread = nthread,
--- a/R-package/man/xgb.parameters.Rd
+++ b/R-package/man/xgb.parameters.Rd
@ -33,9 +33,8 @@ will reset its number of rounds indicator to zero.
 data(agaricus.train, package = "xgboost")
 train <- agaricus.train
-bst <- xgboost(
+bst <- xgb.train(
-  data = train$data,
+  data = xgb.DMatrix(train$data, label = train$label),
  label = train$label,
  max_depth = 2,
  eta = 1,
  nthread = 2,
--- a/R-package/man/xgb.plot.deepness.Rd
+++ b/R-package/man/xgb.plot.deepness.Rd
@ -73,9 +73,8 @@ nthread <- 2
 data.table::setDTthreads(nthread)
 ## Change max_depth to a higher number to get a more significant result
-bst <- xgboost(
+bst <- xgb.train(
-  data = agaricus.train$data,
+  data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
  label = agaricus.train$label,
  max_depth = 6,
  nthread = nthread,
  nrounds = 50,
--- a/R-package/man/xgb.plot.importance.Rd
+++ b/R-package/man/xgb.plot.importance.Rd
@ -88,9 +88,8 @@ data(agaricus.train)
 nthread <- 2
 data.table::setDTthreads(nthread)
-bst <- xgboost(
+bst <- xgb.train(
-  data = agaricus.train$data,
+  data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
  label = agaricus.train$label,
  max_depth = 3,
  eta = 1,
  nthread = nthread,
--- a/R-package/man/xgb.plot.multi.trees.Rd
+++ b/R-package/man/xgb.plot.multi.trees.Rd
@ -67,9 +67,8 @@ data(agaricus.train, package = "xgboost")
 nthread <- 2
 data.table::setDTthreads(nthread)
-bst <- xgboost(
+bst <- xgb.train(
-  data = agaricus.train$data,
+  data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
  label = agaricus.train$label,
  max_depth = 15,
  eta = 1,
  nthread = nthread,
--- a/R-package/man/xgb.plot.shap.Rd
+++ b/R-package/man/xgb.plot.shap.Rd
@ -135,9 +135,8 @@ nthread <- 1
 data.table::setDTthreads(nthread)
 nrounds <- 20
-bst <- xgboost(
+bst <- xgb.train(
-  agaricus.train$data,
+  data = xgb.DMatrix(agaricus.train$data, agaricus.train$label),
  agaricus.train$label,
  nrounds = nrounds,
  eta = 0.1,
  max_depth = 3,
@ -161,9 +160,8 @@ x <- as.matrix(iris[, -5])
 set.seed(123)
 is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
-mbst <- xgboost(
+mbst <- xgb.train(
-  data = x,
+  data = xgb.DMatrix(x, label = as.numeric(iris$Species) - 1),
  label = as.numeric(iris$Species) - 1,
  nrounds = nrounds,
  max_depth = 2,
  eta = 0.3,
--- a/R-package/man/xgb.plot.tree.Rd
+++ b/R-package/man/xgb.plot.tree.Rd
@ -96,9 +96,8 @@ This function uses \href{https://www.graphviz.org/}{GraphViz} as DiagrammeR back
 \examples{
 data(agaricus.train, package = "xgboost")
-bst <- xgboost(
+bst <- xgb.train(
-  data = agaricus.train$data,
+  data = xgb.DMatrix(agaricus.train$data, agaricus.train$label),
  label = agaricus.train$label,
  max_depth = 3,
  eta = 1,
  nthread = 2,
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@ -1,8 +1,7 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/xgb.train.R, R/xgboost.R
+% Please edit documentation in R/xgb.train.R
 \name{xgb.train}
 \alias{xgb.train}
 \alias{xgboost}
 \title{eXtreme Gradient Boosting Training}
 \usage{
 xgb.train(
@ -22,24 +21,6 @@ xgb.train(
  callbacks = list(),
  ...
 )
 xgboost(
  data = NULL,
  label = NULL,
  missing = NA,
  weight = NULL,
  params = list(),
  nrounds,
  verbose = 1,
  print_every_n = 1L,
  early_stopping_rounds = NULL,
  maximize = NULL,
  save_period = NULL,
  save_name = "xgboost.model",
  xgb_model = NULL,
  callbacks = list(),
  ...
 )
 }
 \arguments{
 \item{params}{the list of parameters. The complete list of parameters is
@ -240,15 +221,6 @@ to customize the training process.
 }\if{html}{\out{</div>}}}
 \item{...}{other parameters to pass to \code{params}.}
 \item{label}{vector of response values. Should not be provided when data is
 a local data file name or an \code{xgb.DMatrix}.}
 \item{missing}{by default is set to NA, which means that NA values should be considered as 'missing'
 by the algorithm. Sometimes, 0 or other extreme value might be used to represent missing values.
 This parameter is only used when input is a dense matrix.}
 \item{weight}{a vector indicating the weight for each row of the input.}
 }
 \value{
 An object of class \code{xgb.Booster}.
@ -383,9 +355,8 @@ bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
                 early_stopping_rounds = 3)
 ## An 'xgboost' interface example:
-bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
+bst <- xgboost(x = agaricus.train$data, y = factor(agaricus.train$label),
-               max_depth = 2, eta = 1, nthread = nthread, nrounds = 2,
+               params = list(max_depth = 2, eta = 1), nthread = nthread, nrounds = 2)
               objective = "binary:logistic")
 pred <- predict(bst, agaricus.test$data)
 }
--- a/R-package/man/xgboost.Rd
+++ b/R-package/man/xgboost.Rd
@ -0,0 +1,213 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/xgboost.R
 \name{xgboost}
 \alias{xgboost}
 \title{Fit XGBoost Model}
 \usage{
 xgboost(
  x,
  y,
  objective = NULL,
  nrounds = 100L,
  weights = NULL,
  verbosity = 0L,
  nthreads = parallel::detectCores(),
  seed = 0L,
  monotone_constraints = NULL,
  interaction_constraints = NULL,
  feature_weights = NULL,
  base_margin = NULL,
  ...
 )
 }
 \arguments{
 \item{x}{The features / covariates. Can be passed as:\itemize{
 \item A numeric or integer `matrix`.
 \item A `data.frame`, in which all columns are one of the following types:\itemize{
  \item `numeric`
  \item `integer`
  \item `logical`
  \item `factor`
 }
 Columns of `factor` type will be assumed to be categorical, while other column types will
 be assumed to be numeric.
 \item A sparse matrix from the `Matrix` package, either as `dgCMatrix` or `dgRMatrix` class.
 }
 Note that categorical features are only supported for `data.frame` inputs, and are automatically
 determined based on their types. See \link{xgb.train} with \link{xgb.DMatrix} for more flexible
 variants that would allow something like categorical features on sparse matrices.}
 \item{y}{The response variable. Allowed values are:\itemize{
 \item A numeric or integer vector (for regression tasks).
 \item A factor or character vector (for binary and multi-class classification tasks).
 \item A logical (boolean) vector (for binary classification tasks).
 \item A numeric or integer matrix or `data.frame` with numeric/integer columns
 (for multi-task regression tasks).
 \item A `Surv` object from the `survival` package (for survival tasks).
 }
 If `objective` is `NULL`, the right task will be determined automatically based on
 the class of `y`.
 If `objective` is not `NULL`, it must match with the type of `y` - e.g. `factor` types of `y`
 can only be used with classification objectives and vice-versa.
 For binary classification, the last factor level of `y` will be used as the "positive"
 class - that is, the numbers from `predict` will reflect the probabilities of belonging to this
 class instead of to the first factor level. If `y` is a `logical` vector, then `TRUE` will be
 set as the last level.}
 \item{objective}{Optimization objective to minimize based on the supplied data, to be passed
 by name as a string / character (e.g. `reg:absoluteerror`). See the
 \href{https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters}{
 Learning Task Parameters} page for more detailed information on allowed values.
 If `NULL` (the default), will be automatically determined from `y` according to the following
 logic:\itemize{
 \item If `y` is a factor with 2 levels, will use `binary:logistic`.
 \item If `y` is a factor with more than 2 levels, will use `multi:softprob` (number of classes
 will be determined automatically, should not be passed under `params`).
 \item If `y` is a `Surv` object from the `survival` package, will use `survival:aft` (note that
 the only types supported are left / right / interval censored).
 \item Otherwise, will use `reg:squarederror`.
 }
 If `objective` is not `NULL`, it must match with the type of `y` - e.g. `factor` types of `y`
 can only be used with classification objectives and vice-versa.
 Note that not all possible `objective` values supported by the core XGBoost library are allowed
 here - for example, objectives which are a variation of another but with a different default
 prediction type (e.g. `multi:softmax` vs. `multi:softprob`) are not allowed, and neither are
 ranking objectives, nor custom objectives at the moment.}
 \item{nrounds}{Number of boosting iterations / rounds.
 Note that the number of default boosting rounds here is not automatically tuned, and different
 problems will have vastly different optimal numbers of boosting rounds.}
 \item{weights}{Sample weights for each row in `x` and `y`. If `NULL` (the default), each row
 will have the same weight.
 If not `NULL`, should be passed as a numeric vector with length matching to the number of
 rows in `x`.}
 \item{verbosity}{Verbosity of printing messages. Valid values of 0 (silent), 1 (warning),
 2 (info), and 3 (debug).}
 \item{nthreads}{Number of parallel threads to use. If passing zero, will use all CPU threads.}
 \item{seed}{Seed to use for random number generation. If passing `NULL`, will draw a random
 number using R's PRNG system to use as seed.}
 \item{monotone_constraints}{Optional monotonicity constraints for features.
 Can be passed either as a named list (when `x` has column names), or as a vector. If passed
 as a vector and `x` has column names, will try to match the elements by name.
 A value of `+1` for a given feature makes the model predictions / scores constrained to be
 a monotonically increasing function of that feature (that is, as the value of the feature
 increases, the model prediction cannot decrease), while a value of `-1` makes it a monotonically
 decreasing function. A value of zero imposes no constraint.
 The input for `monotone_constraints` can be a subset of the columns of `x` if named, in which
 case the columns that are not referred to in `monotone_constraints` will be assumed to have
 a value of zero (no constraint imposed on the model for those features).
 See the tutorial \href{https://xgboost.readthedocs.io/en/stable/tutorials/monotonic.html}{
 Monotonic Constraints} for a more detailed explanation.}
 \item{interaction_constraints}{Constraints for interaction representing permitted interactions.
 The constraints must be specified in the form of a list of vectors referencing columns in the
 data, e.g. `list(c(1, 2), c(3, 4, 5))` (with these numbers being column indices, numeration
 starting at 1 - i.e. the first sublist references the first and second columns) or
 `list(c("Sepal.Length", "Sepal.Width"), c("Petal.Length", "Petal.Width"))` (references
 columns by names), where each vector is a group of indices of features that are allowed to
 interact with each other.
 See the tutorial
 \href{https://xgboost.readthedocs.io/en/stable/tutorials/feature_interaction_constraint.html}{
 Feature Interaction Constraints} for more information.}
 \item{feature_weights}{Feature weights for column sampling.
 Can be passed either as a vector with length matching to columns of `x`, or as a named
 list (only if `x` has column names) with names matching to columns of 'x'. If it is a
 named vector, will try to match the entries to column names of `x` by name.
 If `NULL` (the default), all columns will have the same weight.}
 \item{base_margin}{Base margin used for boosting from existing model.
 If passing it, will start the gradient boosting procedure from the scores that are provided
 here - for example, one can pass the raw scores from a previous model, or some per-observation
 offset, or similar.
 Should be either a numeric vector or numeric matrix (for multi-class and multi-target objectives)
 with the same number of rows as `x` and number of columns corresponding to number of optimization
 targets, and should be in the untransformed scale (for example, for objective `binary:logistic`,
 it should have log-odds, not probabilities; and for objective `multi:softprob`, should have
 number of columns matching to number of classes in the data).
 Note that, if it contains more than one column, then columns will not be matched by name to
 the corresponding `y` - `base_margin` should have the same column order that the model will use
 (for example, for objective `multi:softprob`, columns of `base_margin` will be matched against
 `levels(y)` by their position, regardless of what `colnames(base_margin)` returns).
 If `NULL`, will start from zero, but note that for most objectives, an intercept is usually
 added (controllable through parameter `base_score` instead) when `base_margin` is not passed.}
 \item{...}{Other training parameters. See the online documentation
 \href{https://xgboost.readthedocs.io/en/stable/parameter.html}{XGBoost Parameters} for
 details about possible values and what they do.
 Note that not all possible values from the core XGBoost library are allowed as `params` for
 'xgboost()' - in particular, values which require an already-fitted booster object (such as
 `process_type`) are not accepted here.}
 }
 \value{
 A model object, inheriting from both `xgboost` and `xgb.Booster`. Compared to the regular
 `xgb.Booster` model class produced by \link{xgb.train}, this `xgboost` class will have an
 additional attribute `metadata` containing information which is used for formatting prediction
 outputs, such as class names for classification problems.
 }
 \description{
 Fits an XGBoost model (boosted decision tree ensemble) to given x/y data.
 See the tutorial \href{https://xgboost.readthedocs.io/en/stable/tutorials/model.html}{
 Introduction to Boosted Trees} for a longer explanation of what XGBoost does.
 This function is intended to provide a more user-friendly interface for XGBoost that follows
 R's conventions for model fitting and predictions, but which doesn't expose all of the
 possible functionalities of the core XGBoost library.
 See \link{xgb.train} for a more flexible low-level alternative which is similar across different
 language bindings of XGBoost and which exposes the full library's functionalities.
 }
 \details{
 For package authors using `xgboost` as a dependency, it is highly recommended to use
 \link{xgb.train} in package code instead of `xgboost()`, since it has a more stable interface
 and performs fewer data conversions and copies along the way.
 }
 \examples{
 library(xgboost)
 data(mtcars)
 # Fit a small regression model on the mtcars data
 model_regression <- xgboost(mtcars[, -1], mtcars$mpg, nthreads = 1, nrounds = 3)
 predict(model_regression, mtcars, validate_features = TRUE)
 # Task objective is determined automatically according to the type of 'y'
 data(iris)
 model_classif <- xgboost(iris[, -5], iris$Species, nthreads = 1, nrounds = 5)
 predict(model_classif, iris, validate_features = TRUE)
 }
 \references{
 \itemize{
 \item Chen, Tianqi, and Carlos Guestrin. "Xgboost: A scalable tree boosting system."
 Proceedings of the 22nd acm sigkdd international conference on knowledge discovery and
 data mining. 2016.
 \item \url{https://xgboost.readthedocs.io/en/stable/}
 }
 }
--- a/R-package/tests/testthat/test_xgboost.R
+++ b/R-package/tests/testthat/test_xgboost.R
@ -0,0 +1,623 @@
 library(survival)
 library(data.table)
 test_that("Auto determine objective", {
  y_num <- seq(1, 10)
  res_num <- process.y.margin.and.objective(y_num, NULL, NULL, NULL)
  expect_equal(res_num$params$objective, "reg:squarederror")
  y_bin <- factor(c('a', 'b', 'a', 'b'), c('a', 'b'))
  res_bin <- process.y.margin.and.objective(y_bin, NULL, NULL, NULL)
  expect_equal(res_bin$params$objective, "binary:logistic")
  y_multi <- factor(c('a', 'b', 'a', 'b', 'c'), c('a', 'b', 'c'))
  res_multi <- process.y.margin.and.objective(y_multi, NULL, NULL, NULL)
  expect_equal(res_multi$params$objective, "multi:softprob")
  y_surv <- Surv(1:10, rep(c(0, 1), 5), type = "right")
  res_surv <- process.y.margin.and.objective(y_surv, NULL, NULL, NULL)
  expect_equal(res_surv$params$objective, "survival:aft")
  y_multicol <- matrix(seq(1, 20), nrow = 5)
  res_multicol <- process.y.margin.and.objective(y_multicol, NULL, NULL, NULL)
  expect_equal(res_multicol$params$objective, "reg:squarederror")
 })
 test_that("Process vectors", {
  y <- seq(1, 10)
  for (y_inp in list(as.integer(y), as.numeric(y))) {
    res <- process.y.margin.and.objective(y_inp, NULL, "reg:pseudohubererror", NULL)
    expect_equal(
      res$dmatrix_args$label,
      y
    )
    expect_equal(
      res$params$objective,
      "reg:pseudohubererror"
    )
  }
 })
 test_that("Process factors", {
  y_bin <- factor(c('a', 'b', 'a', 'b'), c('a', 'b'))
  expect_error({
    process.y.margin.and.objective(y_bin, NULL, "multi:softprob", NULL)
  })
  for (bin_obj in c("binary:logistic", "binary:hinge")) {
    for (y_inp in list(y_bin, as.ordered(y_bin))) {
      res_bin <- process.y.margin.and.objective(y_inp, NULL, bin_obj, NULL)
      expect_equal(
        res_bin$dmatrix_args$label,
        c(0, 1, 0, 1)
      )
      expect_equal(
        res_bin$metadata$y_levels,
        c('a', 'b')
      )
      expect_equal(
        res_bin$params$objective,
        bin_obj
      )
    }
  }
  y_bin2 <- factor(c(1, 0, 1, 0), c(1, 0))
  res_bin <- process.y.margin.and.objective(y_bin2, NULL, "binary:logistic", NULL)
  expect_equal(
    res_bin$dmatrix_args$label,
    c(0, 1, 0, 1)
  )
  expect_equal(
    res_bin$metadata$y_levels,
    c("1", "0")
  )
  y_bin3 <- c(TRUE, FALSE, TRUE)
  res_bin <- process.y.margin.and.objective(y_bin3, NULL, "binary:logistic", NULL)
  expect_equal(
    res_bin$dmatrix_args$label,
    c(1, 0, 1)
  )
  expect_equal(
    res_bin$metadata$y_levels,
    c("FALSE", "TRUE")
  )
  y_multi <- factor(c('a', 'b', 'c', 'd', 'a', 'b'), c('a', 'b', 'c', 'd'))
  expect_error({
    process.y.margin.and.objective(y_multi, NULL, "binary:logistic", NULL)
  })
  expect_error({
    process.y.margin.and.objective(y_multi, NULL, "binary:logistic", NULL)
  })
  res_multi <- process.y.margin.and.objective(y_multi, NULL, "multi:softprob", NULL)
  expect_equal(
    res_multi$dmatrix_args$label,
    c(0, 1, 2, 3, 0, 1)
  )
  expect_equal(
    res_multi$metadata$y_levels,
    c('a', 'b', 'c', 'd')
  )
  expect_equal(
    res_multi$params$num_class,
    4
  )
  expect_equal(
    res_multi$params$objective,
    "multi:softprob"
  )
 })
 test_that("Process survival objects", {
  data(cancer, package = "survival")
  y_right <- Surv(cancer$time, cancer$status - 1, type = "right")
  res_cox <- process.y.margin.and.objective(y_right, NULL, "survival:cox", NULL)
  expect_equal(
    res_cox$dmatrix_args$label,
    ifelse(cancer$status == 2, cancer$time, -cancer$time)
  )
  expect_equal(
    res_cox$params$objective,
    "survival:cox"
  )
  res_aft <- process.y.margin.and.objective(y_right, NULL, "survival:aft", NULL)
  expect_equal(
    res_aft$dmatrix_args$label_lower_bound,
    cancer$time
  )
  expect_equal(
    res_aft$dmatrix_args$label_upper_bound,
    ifelse(cancer$status == 2, cancer$time, Inf)
  )
  expect_equal(
    res_aft$params$objective,
    "survival:aft"
  )
  y_left <- Surv(seq(1, 4), c(1, 0, 1, 0), type = "left")
  expect_error({
    process.y.margin.and.objective(y_left, NULL, "survival:cox", NULL)
  })
  res_aft <- process.y.margin.and.objective(y_left, NULL, "survival:aft", NULL)
  expect_equal(
    res_aft$dmatrix_args$label_lower_bound,
    c(1, 0, 3, 0)
  )
  expect_equal(
    res_aft$dmatrix_args$label_upper_bound,
    seq(1, 4)
  )
  expect_equal(
    res_aft$params$objective,
    "survival:aft"
  )
  y_interval <- Surv(
    time = c(1, 5, 2, 10, 3),
    time2 = c(2, 5, 2.5, 10, 3),
    event = c(3, 1, 3, 0, 2),
    type = "interval"
  )
  expect_error({
    process.y.margin.and.objective(y_interval, NULL, "survival:cox", NULL)
  })
  res_aft <- process.y.margin.and.objective(y_interval, NULL, "survival:aft", NULL)
  expect_equal(
    res_aft$dmatrix_args$label_lower_bound,
    c(1, 5, 2, 10, 0)
  )
  expect_equal(
    res_aft$dmatrix_args$label_upper_bound,
    c(2, 5, 2.5, Inf, 3)
  )
  expect_equal(
    res_aft$params$objective,
    "survival:aft"
  )
  y_interval_neg <- Surv(
    time = c(1, -5, 2, 10, 3),
    time2 = c(2, -5, 2.5, 10, 3),
    event = c(3, 1, 3, 0, 2),
    type = "interval"
  )
  expect_error({
    process.y.margin.and.objective(y_interval_neg, NULL, "survival:aft", NULL)
  })
 })
 test_that("Process multi-target", {
  data(mtcars)
  y_multi <- data.frame(
    y1 = mtcars$mpg,
    y2 = mtcars$mpg ^ 2
  )
  for (y_inp in list(y_multi, as.matrix(y_multi), data.table::as.data.table(y_multi))) {
    res_multi <- process.y.margin.and.objective(y_inp, NULL, "reg:pseudohubererror", NULL)
    expect_equal(
      res_multi$dmatrix_args$label,
      as.matrix(y_multi)
    )
    expect_equal(
      res_multi$metadata$y_names,
      c("y1", "y2")
    )
    expect_equal(
      res_multi$params$objective,
      "reg:pseudohubererror"
    )
  }
  expect_error({
    process.y.margin.and.objective(y_multi, NULL, "count:poisson", NULL)
  })
  y_bad <- data.frame(
    c1 = seq(1, 3),
    c2 = rep(as.Date("2024-01-01"), 3)
  )
  expect_error({
    process.y.margin.and.objective(y_bad, NULL, "reg:squarederror", NULL)
  })
  y_bad <- data.frame(
    c1 = seq(1, 3),
    c2 = factor(c('a', 'b', 'a'), c('a', 'b'))
  )
  expect_error({
    process.y.margin.and.objective(y_bad, NULL, "reg:squarederror", NULL)
  })
  y_bad <- seq(1, 20)
  dim(y_bad) <- c(5, 2, 2)
  expect_error({
    process.y.margin.and.objective(y_bad, NULL, "reg:squarederror", NULL)
  })
 })
 test_that("Process base_margin", {
  y <- seq(101, 110)
  bm_good <- seq(1, 10)
  for (bm in list(bm_good, as.matrix(bm_good), as.data.frame(as.matrix(bm_good)))) {
    res <- process.y.margin.and.objective(y, bm, "reg:squarederror", NULL)
    expect_equal(
      res$dmatrix_args$base_margin,
      seq(1, 10)
    )
  }
  expect_error({
    process.y.margin.and.objective(y, 5, "reg:squarederror", NULL)
  })
  expect_error({
    process.y.margin.and.objective(y, seq(1, 5), "reg:squarederror", NULL)
  })
  expect_error({
    process.y.margin.and.objective(y, matrix(seq(1, 20), ncol = 2), "reg:squarederror", NULL)
  })
  expect_error({
    process.y.margin.and.objective(
      y,
      as.data.frame(matrix(seq(1, 20), ncol = 2)),
      "reg:squarederror",
      NULL
    )
  })
  y <- factor(c('a', 'b', 'c', 'a'))
  bm_good <- matrix(seq(1, 12), ncol = 3)
  for (bm in list(bm_good, as.data.frame(bm_good))) {
    res <- process.y.margin.and.objective(y, bm, "multi:softprob", NULL)
    expect_equal(
      res$dmatrix_args$base_margin |> unname(),
      matrix(seq(1, 12), ncol = 3)
    )
  }
  expect_error({
    process.y.margin.and.objective(y, as.numeric(bm_good), "multi:softprob", NULL)
  })
  expect_error({
    process.y.margin.and.objective(y, 5, "multi:softprob", NULL)
  })
  expect_error({
    process.y.margin.and.objective(y, bm_good[, 1], "multi:softprob", NULL)
  })
  expect_error({
    process.y.margin.and.objective(y, bm_good[, c(1, 2)], "multi:softprob", NULL)
  })
  expect_error({
    process.y.margin.and.objective(y, bm_good[c(1, 2), ], "multi:softprob", NULL)
  })
  y <- seq(101, 110)
  bm_good <- matrix(seq(1, 30), ncol = 3)
  params <- list(quantile_alpha = c(0.1, 0.5, 0.9))
  for (bm in list(bm_good, as.data.frame(bm_good))) {
    res <- process.y.margin.and.objective(y, bm, "reg:quantileerror", params)
    expect_equal(
      res$dmatrix_args$base_margin |> unname(),
      matrix(seq(1, 30), ncol = 3)
    )
  }
  expect_error({
    process.y.margin.and.objective(y, as.numeric(bm_good), "reg:quantileerror", params)
  })
  expect_error({
    process.y.margin.and.objective(y, 5, "reg:quantileerror", params)
  })
  expect_error({
    process.y.margin.and.objective(y, bm_good[, 1], "reg:quantileerror", params)
  })
  expect_error({
    process.y.margin.and.objective(y, bm_good[, c(1, 2)], "reg:quantileerror", params)
  })
  expect_error({
    process.y.margin.and.objective(y, bm_good[c(1, 2, 3), ], "reg:quantileerror", params)
  })
  y <- matrix(seq(101, 130), ncol = 3)
  for (bm in list(bm_good, as.data.frame(bm_good))) {
    res <- process.y.margin.and.objective(y, bm, "reg:squarederror", params)
    expect_equal(
      res$dmatrix_args$base_margin |> unname(),
      matrix(seq(1, 30), ncol = 3)
    )
  }
  expect_error({
    process.y.margin.and.objective(y, as.numeric(bm_good), "reg:squarederror", params)
  })
  expect_error({
    process.y.margin.and.objective(y, 5, "reg:squarederror", params)
  })
  expect_error({
    process.y.margin.and.objective(y, bm_good[, 1], "reg:squarederror", params)
  })
  expect_error({
    process.y.margin.and.objective(y, bm_good[, c(1, 2)], "reg:squarederror", params)
  })
  expect_error({
    process.y.margin.and.objective(y, bm_good[c(1, 2, 3), ], "reg:squarederror", params)
  })
 })
 test_that("Process monotone constraints", {
  data(iris)
  mc_list <- list(Sepal.Width = 1)
  res <- process.x.and.col.args(
    iris,
    monotone_constraints = mc_list,
    interaction_constraints = NULL,
    feature_weights = NULL,
    lst_args = list(),
    use_qdm = FALSE
  )
  expect_equal(
    res$params$monotone_constraints,
    c(0, 1, 0, 0, 0)
  )
  mc_list2 <- list(Sepal.Width = 1, Petal.Width = -1)
  res <- process.x.and.col.args(
    iris,
    monotone_constraints = mc_list2,
    interaction_constraints = NULL,
    feature_weights = NULL,
    lst_args = list(),
    use_qdm = FALSE
  )
  expect_equal(
    res$params$monotone_constraints,
    c(0, 1, 0, -1, 0)
  )
  mc_vec <- c(0, 1, -1, 0, 0)
  res <- process.x.and.col.args(
    iris,
    monotone_constraints = mc_vec,
    interaction_constraints = NULL,
    feature_weights = NULL,
    lst_args = list(),
    use_qdm = FALSE
  )
  expect_equal(
    res$params$monotone_constraints,
    c(0, 1, -1, 0, 0)
  )
  mc_named_vec <- c(1, 1)
  names(mc_named_vec) <- names(iris)[1:2]
  res <- process.x.and.col.args(
    iris,
    monotone_constraints = mc_named_vec,
    interaction_constraints = NULL,
    feature_weights = NULL,
    lst_args = list(),
    use_qdm = FALSE
  )
  expect_equal(
    res$params$monotone_constraints,
    c(1, 1, 0, 0, 0)
  )
  mc_named_all <- c(0, -1, 1, 0, -1)
  names(mc_named_all) <- rev(names(iris))
  res <- process.x.and.col.args(
    iris,
    monotone_constraints = mc_named_all,
    interaction_constraints = NULL,
    feature_weights = NULL,
    lst_args = list(),
    use_qdm = FALSE
  )
  expect_equal(
    res$params$monotone_constraints,
    rev(mc_named_all) |> unname()
  )
  expect_error({
    process.x.and.col.args(
      iris,
      monotone_constraints = list(
        Sepal.Width = 1,
        Petal.Width = -1,
        Sepal.Width = -1
      ),
      interaction_constraints = NULL,
      feature_weights = NULL,
      lst_args = list(),
      use_qdm = FALSE
    )
  })
  expect_error({
    process.x.and.col.args(
      iris,
      monotone_constraints = rep(0, 6),
      interaction_constraints = NULL,
      feature_weights = NULL,
      lst_args = list(),
      use_qdm = FALSE
    )
  })
 })
 test_that("Process interaction_constraints", {
  data(iris)
  res <- process.x.and.col.args(iris, NULL, list(c(1L, 2L)), NULL, NULL, FALSE)
  expect_equal(
    res$params$interaction_constraints,
    list(c(0, 1))
  )
  res <- process.x.and.col.args(iris, NULL, list(c(1.0, 2.0)), NULL, NULL, FALSE)
  expect_equal(
    res$params$interaction_constraints,
    list(c(0, 1))
  )
  res <- process.x.and.col.args(iris, NULL, list(c(1, 2), c(3, 4)), NULL, NULL, FALSE)
  expect_equal(
    res$params$interaction_constraints,
    list(c(0, 1), c(2, 3))
  )
  res <- process.x.and.col.args(
    iris, NULL, list(c("Sepal.Length", "Sepal.Width")), NULL, NULL, FALSE
  )
  expect_equal(
    res$params$interaction_constraints,
    list(c(0, 1))
  )
  res <- process.x.and.col.args(
    as.matrix(iris),
    NULL,
    list(c("Sepal.Length", "Sepal.Width")),
    NULL,
    NULL,
    FALSE
  )
  expect_equal(
    res$params$interaction_constraints,
    list(c(0, 1))
  )
  res <- process.x.and.col.args(
    iris,
    NULL,
    list(c("Sepal.Width", "Petal.Length"), c("Sepal.Length", "Petal.Width", "Species")),
    NULL,
    NULL,
    FALSE
  )
  expect_equal(
    res$params$interaction_constraints,
    list(c(1, 2), c(0, 3, 4))
  )
  expect_error({
    process.x.and.col.args(iris, NULL, list(c(1L, 20L)), NULL, NULL, FALSE)
  })
  expect_error({
    process.x.and.col.args(iris, NULL, list(c(0L, 2L)), NULL, NULL, FALSE)
  })
  expect_error({
    process.x.and.col.args(iris, NULL, list(c("1", "2")), NULL, NULL, FALSE)
  })
  expect_error({
    process.x.and.col.args(iris, NULL, list(c("Sepal", "Petal")), NULL, NULL, FALSE)
  })
  expect_error({
    process.x.and.col.args(iris, NULL, c(1L, 2L), NULL, NULL, FALSE)
  })
  expect_error({
    process.x.and.col.args(iris, NULL, matrix(c(1L, 2L)), NULL, NULL, FALSE)
  })
  expect_error({
    process.x.and.col.args(iris, NULL, list(c(1, 2.5)), NULL, NULL, FALSE)
  })
 })
 test_that("Sparse matrices are casted to CSR for QDM", {
  data(agaricus.test, package = "xgboost")
  x <- agaricus.test$data
  for (x_in in list(x, methods::as(x, "TsparseMatrix"))) {
    res <- process.x.and.col.args(
      x_in,
      NULL,
      NULL,
      NULL,
      NULL,
      TRUE
    )
    expect_s4_class(res$dmatrix_args$data, "dgRMatrix")
  }
 })
 test_that("Process feature_weights", {
  data(iris)
  w_vector <- seq(1, 5)
  res <-  process.x.and.col.args(
    iris,
    monotone_constraints = NULL,
    interaction_constraints = NULL,
    feature_weights = w_vector,
    lst_args = list(),
    use_qdm = FALSE
  )
  expect_equal(
    res$dmatrix_args$feature_weights,
    seq(1, 5)
  )
  w_named_vector <- seq(1, 5)
  names(w_named_vector) <- rev(names(iris))
  res <-  process.x.and.col.args(
    iris,
    monotone_constraints = NULL,
    interaction_constraints = NULL,
    feature_weights = w_named_vector,
    lst_args = list(),
    use_qdm = FALSE
  )
  expect_equal(
    res$dmatrix_args$feature_weights,
    rev(seq(1, 5))
  )
  w_list <- list(
    Species = 5,
    Sepal.Length = 1,
    Sepal.Width = 2,
    Petal.Length = 3,
    Petal.Width = 4
  )
  res <- process.x.and.col.args(
    iris,
    monotone_constraints = NULL,
    interaction_constraints = NULL,
    feature_weights = w_list,
    lst_args = list(),
    use_qdm = FALSE
  )
  expect_equal(
    res$dmatrix_args$feature_weights,
    seq(1, 5)
  )
 })
 test_that("Whole function works", {
  data(cancer, package = "survival")
  y <- Surv(cancer$time, cancer$status - 1, type = "right")
  x <- as.data.table(cancer)[, -c("time", "status")]
  model <- xgboost(
    x,
    y,
    monotone_constraints = list(age = -1),
    nthreads = 1L,
    nrounds = 5L,
    eta = 3
  )
  expect_equal(
    attributes(model)$params$objective,
    "survival:aft"
  )
  expect_equal(
    attributes(model)$metadata$n_targets,
    1L
  )
  expect_equal(
    attributes(model)$params$monotone_constraints,
    "(0,-1,0,0,0,0,0,0)"
  )
  expect_false(
    "interaction_constraints" %in% names(attributes(model)$params)
  )
  expect_equal(
    attributes(model)$params$eta,
    3
  )
  txt <- capture.output({
    print(model)
  })
  expect_true(any(grepl("Objective: survival:aft", txt, fixed = TRUE)))
  expect_true(any(grepl("monotone_constraints", txt, fixed = TRUE)))
  expect_true(any(grepl("Number of iterations: 5", txt, fixed = TRUE)))
  expect_true(any(grepl("Number of features: 8", txt, fixed = TRUE)))
 })
--- a/R-package/vignettes/discoverYourData.Rmd
+++ b/R-package/vignettes/discoverYourData.Rmd
@ -173,8 +173,9 @@ Build the model
 The code below is very usual. For more information, you can look at the documentation of `xgboost` function (or at the vignette [XGBoost presentation](https://github.com/dmlc/xgboost/blob/master/R-package/vignettes/xgboostPresentation.Rmd)).
 ```{r}
-bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
+bst <- xgboost(x = sparse_matrix, y = output_vector,
-               eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic")
+               params = list(max_depth = 4, eta = 1),
               nthread = 2, nrounds = 10)
 ```
@ -299,28 +300,28 @@ test <- agaricus.test
 #Random Forest - 1000 trees
 bst <- xgboost(
-    data = train$data,
+    x = train$data,
-    label = train$label,
+    y = factor(train$label, levels = c(0, 1)),
-    max_depth = 4,
+    params = list(
-    num_parallel_tree = 1000,
+        max_depth = 4,
-    subsample = 0.5,
+        num_parallel_tree = 1000,
-    colsample_bytree = 0.5,
+        subsample = 0.5,
        colsample_bytree = 0.5
    ),
    nrounds = 1,
    objective = "binary:logistic",
    nthread = 2
 )
 #Boosting - 3 rounds
 bst <- xgboost(
-    data = train$data,
+    x = train$data,
-    label = train$label,
+    y = factor(train$label, levels = c(0, 1)),
-    max_depth = 4,
+    params = list(max_depth = 4),
    nrounds = 3,
    objective = "binary:logistic",
    nthread = 2
 )
 ```
-> Note that the parameter `round` is set to `1`.
+> Note that the parameter `nrounds` is set to `1`.
 > [**Random Forests**](https://www.stat.berkeley.edu/~breiman/RandomForests/cc_papers.htm) is a trademark of Leo Breiman and Adele Cutler and is licensed exclusively to Salford Systems for the commercial release of the software.
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@ -146,22 +146,19 @@ In a *sparse* matrix, cells containing `0` are not stored in memory. Therefore,
 We will train decision tree model using the following parameters:
-* `objective = "binary:logistic"`: we will train a binary classification model ;
+* `objective = "binary:logistic"`: we will train a binary classification model (note that this is set automatically when `y` is a `factor`) ;
 * `max_depth = 2`: the trees won't be deep, because our case is very simple ;
 * `nthread = 2`: the number of CPU threads we are going to use;
 * `nrounds = 2`: there will be two passes on the data, the second one will enhance the model by further reducing the difference between ground truth and prediction.
 ```{r trainingSparse, message=F, warning=F}
 bstSparse <- xgboost(
-    data = train$data
+    x = train$data
-    , label = train$label
+    , y = factor(train$label, levels = c(0, 1))
-    , params = list(
+    , objective = "binary:logistic"
-        max_depth = 2
+    , params = list(max_depth = 2, eta = 1)
        , eta = 1
        , nthread = 2
        , objective = "binary:logistic"
    )
    , nrounds = 2
    , nthread = 2
 )
 ```
@ -175,15 +172,11 @@ Alternatively, you can put your dataset in a *dense* matrix, i.e. a basic **R**
 ```{r trainingDense, message=F, warning=F}
 bstDense <- xgboost(
-    data = as.matrix(train$data),
+    x = as.matrix(train$data),
-    label = train$label,
+    y = factor(train$label, levels = c(0, 1)),
-    params = list(
+    params = list(max_depth = 2, eta = 1),
-        max_depth = 2,
+    nrounds = 2,
-        eta = 1,
+    nthread = 2
        nthread = 2,
        objective = "binary:logistic"
    ),
    nrounds = 2
 )
 ```
@ -193,7 +186,7 @@ bstDense <- xgboost(
 ```{r trainingDmatrix, message=F, warning=F}
 dtrain <- xgb.DMatrix(data = train$data, label = train$label, nthread = 2)
-bstDMatrix <- xgboost(
+bstDMatrix <- xgb.train(
    data = dtrain,
    params = list(
        max_depth = 2,
@ -213,7 +206,7 @@ One of the simplest way to see the training progress is to set the `verbose` opt
 ```{r trainingVerbose0, message=T, warning=F}
 # verbose = 0, no message
-bst <- xgboost(
+bst <- xgb.train(
    data = dtrain
    , params = list(
        max_depth = 2
@ -228,7 +221,7 @@ bst <- xgboost(
 ```{r trainingVerbose1, message=T, warning=F}
 # verbose = 1, print evaluation metric
-bst <- xgboost(
+bst <- xgb.train(
    data = dtrain
    , params = list(
        max_depth = 2
@ -243,7 +236,7 @@ bst <- xgboost(
 ```{r trainingVerbose2, message=T, warning=F}
 # verbose = 2, also print information about tree
-bst <- xgboost(
+bst <- xgb.train(
    data = dtrain
    , params = list(
        max_depth = 2
--- a/doc/tutorials/feature_interaction_constraint.rst
+++ b/doc/tutorials/feature_interaction_constraint.rst
@ -178,9 +178,10 @@ parameter:
 Using feature name instead
 **************************
-XGBoost's Python package supports using feature names instead of feature index for
+XGBoost's Python and R packages support using feature names instead of feature index for
 specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
-feature interaction constraint can be specified as ``[["f0", "f2"]]``.
+feature interaction constraint can be specified as ``[["f0", "f2"]]`` (Python) or
 ``list(c("f0", "f2"))`` (R, when passing them to function ``xgboost()``).
 **************
 Advanced topic
--- a/doc/tutorials/monotonic.rst
+++ b/doc/tutorials/monotonic.rst
@ -97,7 +97,8 @@ Some other examples:
 Using feature names
 *******************
-XGBoost's Python package supports using feature names instead of feature index for
+XGBoost's Python and R packages support using feature names instead of feature indices for
 specifying the constraints. Given a data frame with columns ``["f0", "f1", "f2"]``, the
-monotonic constraint can be specified as ``{"f0": 1, "f2": -1}``, and ``"f1"`` will
+monotonic constraint can be specified as ``{"f0": 1, "f2": -1}`` (Python) or as
 ``list(f0=1, f2=-1)`` (R, when using 'xgboost()', but not 'xgb.train'), and ``"f1"`` will
 default to ``0`` (no constraint).