[R] remove 'reshape' argument, let shapes be handled by core cpp library (#10330)

2024-08-18 17:31:38 +02:00 · 2024-08-18 17:31:38 +02:00 · caabee2135
commit caabee2135
parent fd365c147e
13 changed files with 239 additions and 248 deletions
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@ -853,8 +853,7 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
        pr <- predict(
          fd$bst,
          fd$evals[[2L]],
-          outputmargin = env$outputmargin,
+          outputmargin = env$outputmargin
          reshape = TRUE
        )
        if (is.null(pred)) {
          if (NCOL(pr) > 1L) {
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -199,8 +199,7 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) {
      bst,
      dtrain,
      outputmargin = TRUE,
-      training = TRUE,
+      training = TRUE
      reshape = TRUE
    )
    gpair <- obj(pred, dtrain)
    n_samples <- dim(dtrain)[1]
@ -246,7 +245,7 @@ xgb.iter.eval <- function(bst, evals, iter, feval) {
    res <- sapply(seq_along(evals), function(j) {
      w <- evals[[j]]
      ## predict using all trees
-      preds <- predict(bst, w, outputmargin = TRUE, reshape = TRUE, iterationrange = "all")
+      preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
      eval_res <- feval(preds, w)
      out <- eval_res$value
      names(out) <- paste0(evnames[j], "-", eval_res$metric)
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@ -112,9 +112,6 @@ xgb.get.handle <- function(object) {
 #' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
 #' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
 #' @param predinteraction Whether to return contributions of feature interactions to individual predictions (see Details).
 #' @param reshape Whether to reshape the vector of predictions to matrix form when there are several
 #'        prediction outputs per case. No effect if `predleaf`, `predcontrib`,
 #'        or `predinteraction` is `TRUE`.
 #' @param training Whether the prediction result is used for training. For dart booster,
 #'        training predicting will perform dropout.
 #' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
@ -128,8 +125,24 @@ xgb.get.handle <- function(object) {
 #'        of the iterations (rounds) otherwise.
 #'
 #'        If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
-#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
+#' @param strict_shape Whether to always return an array with the same dimensions for the given prediction mode
-#'        type and shape of predictions are invariant to the model type.
+#'        regardless of the model type - meaning that, for example, both a multi-class and a binary classification
 #'        model would generate output arrays with the same number of dimensions, with the 'class' dimension having
 #'        size equal to '1' for the binary model.
 #'
 #'        If passing `FALSE` (the default), dimensions will be simplified according to the model type, so that a
 #'        binary classification model for example would not have a redundant dimension for 'class'.
 #'
 #'        See documentation for the return type for the exact shape of the output arrays for each prediction mode.
 #' @param avoid_transpose Whether to output the resulting predictions in the same memory layout in which they
 #'        are generated by the core XGBoost library, without transposing them to match the expected output shape.
 #'
 #'        Internally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major
 #'        order, hence the result needs to be transposed in order to have the expected shape when represented as
 #'        an R array or matrix, which might be a slow operation.
 #'
 #'        If passing `TRUE`, then the result will have dimensions in reverse order - for example, rows
 #'        will be the last dimensions instead of the first dimension.
 #' @param base_margin Base margin used for boosting from existing model.
 #'
 #'        Note that, if `newdata` is an `xgb.DMatrix` object, this argument will
@ -180,28 +193,46 @@ xgb.get.handle <- function(object) {
 #' Note that converting a matrix to [xgb.DMatrix()] uses multiple threads too.
 #'
 #' @return
-#' The return type depends on `strict_shape`. If `FALSE` (default):
+#' A numeric vector or array, with corresponding dimensions depending on the prediction mode and on
-#' - For regression or binary classification: A vector of length `nrows(newdata)`.
+#' parameter `strict_shape` as follows:
 #' - For multiclass classification: A vector of length `num_class * nrows(newdata)` or
 #'   a `(nrows(newdata), num_class)` matrix, depending on the `reshape` value.
 #' - When `predleaf = TRUE`: A matrix with one column per tree.
 #' - When `predcontrib = TRUE`: When not multiclass, a matrix with
 #' ` num_features + 1` columns. The last "+ 1" column corresponds to the baseline value.
 #'   In the multiclass case, a list of `num_class` such matrices.
 #'   The contribution values are on the scale of untransformed margin
 #'   (e.g., for binary classification, the values are log-odds deviations from the baseline).
 #' - When `predinteraction = TRUE`: When not multiclass, the output is a 3d array of
 #'   dimension `c(nrow, num_features + 1, num_features + 1)`. The off-diagonal (in the last two dimensions)
 #'   elements represent different feature interaction contributions. The array is symmetric WRT the last
 #'   two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should
 #'   produce practically the same result as `predcontrib = TRUE`.
 #'   In the multiclass case, a list of `num_class` such arrays.
 #'
-#' When `strict_shape = TRUE`, the output is always an array:
+#' If passing `strict_shape=FALSE`:\itemize{
-#' - For normal predictions, the output has dimension `(num_class, nrow(newdata))`.
+#' \item For regression or binary classification: a vector of length `nrows`.
-#' - For `predcontrib = TRUE`, the dimension is `(ncol(newdata) + 1, num_class, nrow(newdata))`.
+#' \item For multi-class and multi-target objectives: a matrix of dimensions `[nrows, ngroups]`.
-#' - For `predinteraction = TRUE`, the dimension is `(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))`.
+#'
-#' - For `predleaf = TRUE`, the dimension is `(n_trees_in_forest, num_class, n_iterations, nrow(newdata))`.
+#' Note that objective variant `multi:softmax` defaults towards predicting most likely class (a vector
 #' `nrows`) instead of per-class probabilities.
 #' \item For `predleaf`: a matrix with one column per tree.
 #'
 #' For multi-class / multi-target, they will be arranged so that columns in the output will have
 #' the leafs from one group followed by leafs of the other group (e.g. order will be `group1:feat1`,
 #' `group1:feat2`, ..., `group2:feat1`, `group2:feat2`, ...).
 #' \item For `predcontrib`: when not multi-class / multi-target, a matrix with dimensions
 #' `[nrows, nfeats+1]`. The last "+ 1" column corresponds to the baseline value.
 #'
 #' For multi-class and multi-target objectives, will be an array with dimensions `[nrows, ngroups, nfeats+1]`.
 #'
 #' The contribution values are on the scale of untransformed margin (e.g., for binary classification,
 #' the values are log-odds deviations from the baseline).
 #' \item For `predinteraction`: when not multi-class / multi-target, the output is a 3D array of
 #' dimensions `[nrows, nfeats+1, nfeats+1]`. The off-diagonal (in the last two dimensions)
 #' elements represent different feature interaction contributions. The array is symmetric w.r.t. the last
 #' two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last
 #' dimension should produce practically the same result as `predcontrib = TRUE`.
 #'
 #' For multi-class and multi-target, will be a 4D array with dimensions `[nrows, ngroups, nfeats+1, nfeats+1]`
 #' }
 #'
 #' If passing `strict_shape=FALSE`, the result is always an array:\itemize{
 #' \item For normal predictions, the dimension is `[nrows, ngroups]`.
 #' \item For `predcontrib=TRUE`, the dimension is `[nrows, ngroups, nfeats+1]`.
 #' \item For `predinteraction=TRUE`, the dimension is `[nrows, ngroups, nfeats+1, nfeats+1]`.
 #' \item For `predleaf=TRUE`, the dimension is `[nrows, niter, ngroups, num_parallel_tree]`.
 #' }
 #'
 #' If passing `avoid_transpose=TRUE`, then the dimensions in all cases will be in reverse order - for
 #' example, for `predinteraction`, they will be `[nfeats+1, nfeats+1, ngroups, nrows]`
 #' instead of `[nrows, ngroups, nfeats+1, nfeats+1]`.
 #' @seealso [xgb.train()]
 #' @references
 #' 1. Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions",
@ -279,8 +310,6 @@ xgb.get.handle <- function(object) {
 #' # predict for softmax returns num_class probability numbers per case:
 #' pred <- predict(bst, as.matrix(iris[, -5]))
 #' str(pred)
 #' # reshape it to a num_class-columns matrix
 #' pred <- matrix(pred, ncol = num_class, byrow = TRUE)
 #' # convert the probabilities to softmax labels
 #' pred_labels <- max.col(pred) - 1
 #' # the following should result in the same error as seen in the last iteration
@ -311,8 +340,11 @@ xgb.get.handle <- function(object) {
 #' @export
 predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
                                predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
-                                reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE,
+                                training = FALSE, iterationrange = NULL, strict_shape = FALSE, avoid_transpose = FALSE,
                                validate_features = FALSE, base_margin = NULL, ...) {
  if (NROW(list(...))) {
    warning("Passed unused prediction arguments: ", paste(names(list(...)), collapse = ", "), ".")
  }
  if (validate_features) {
    newdata <- validate.features(object, newdata)
  }
@ -415,10 +447,9 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
    return(val)
  }
  ## We set strict_shape to TRUE then drop the dimensions conditionally
  args <- list(
    training = box(training),
-    strict_shape = box(TRUE),
+    strict_shape = as.logical(strict_shape),
    iteration_begin = box(as.integer(iterationrange[1])),
    iteration_end = box(as.integer(iterationrange[2])),
    type = box(as.integer(0))
@ -445,96 +476,36 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
  json_conf <- jsonlite::toJSON(args, auto_unbox = TRUE)
  if (is_dmatrix) {
-    predts <- .Call(
+    arr <- .Call(
      XGBoosterPredictFromDMatrix_R, xgb.get.handle(object), newdata, json_conf
    )
  } else if (use_as_dense_matrix) {
-    predts <- .Call(
+    arr <- .Call(
      XGBoosterPredictFromDense_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
    )
  } else if (use_as_csr_matrix) {
-    predts <- .Call(
+    arr <- .Call(
      XGBoosterPredictFromCSR_R, xgb.get.handle(object), csr_data, missing, json_conf, base_margin
    )
  } else if (use_as_df) {
-    predts <- .Call(
+    arr <- .Call(
      XGBoosterPredictFromColumnar_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
    )
  }
  names(predts) <- c("shape", "results")
  shape <- predts$shape
  arr <- predts$results
  n_ret <- length(arr)
  if (n_row != shape[1]) {
    stop("Incorrect predict shape.")
  }
  .Call(XGSetArrayDimInplace_R, arr, rev(shape))
  cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "(Intercept)") else NULL
  n_groups <- shape[2]
  ## Needed regardless of whether strict shape is being used.
-  if (predcontrib) {
+  if ((predcontrib || predinteraction) && !is.null(colnames(newdata))) {
-    .Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, NULL, NULL))
+    cnames <- c(colnames(newdata), "(Intercept)")
-  } else if (predinteraction) {
+    dim_names <- vector(mode = "list", length = length(dim(arr)))
-    .Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, cnames, NULL, NULL))
+    dim_names[[1L]] <- cnames
-  }
+    if (predinteraction) dim_names[[2L]] <- cnames
-  if (strict_shape) {
+    .Call(XGSetArrayDimNamesInplace_R, arr, dim_names)
    return(arr) # strict shape is calculated by libxgboost uniformly.
  }
-  if (predleaf) {
+  if (!avoid_transpose && is.array(arr)) {
-    ## Predict leaf
+    arr <- aperm(arr)
    if (n_ret == n_row) {
      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
    } else {
      arr <- matrix(arr, nrow = n_row, byrow = TRUE)
    }
  } else if (predcontrib) {
    ## Predict contribution
    arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
    if (n_ret == n_row) {
      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
      .Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
    } else if (n_groups != 1) {
      ## turns array into list of matrices
      arr <- lapply(seq_len(n_groups), function(g) arr[g, , ])
    } else {
      ## remove the first axis (group)
      newdim <- dim(arr)[2:3]
      newdn <- dimnames(arr)[2:3]
      arr <- arr[1, , ]
      .Call(XGSetArrayDimInplace_R, arr, newdim)
      .Call(XGSetArrayDimNamesInplace_R, arr, newdn)
    }
  } else if (predinteraction) {
    ## Predict interaction
    arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
    if (n_ret == n_row) {
      .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
      .Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
    } else if (n_groups != 1) {
      ## turns array into list of matrices
      arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ])
    } else {
      ## remove the first axis (group)
      arr <- arr[1, , , , drop = FALSE]
      newdim <- dim(arr)[2:4]
      newdn <- dimnames(arr)[2:4]
      .Call(XGSetArrayDimInplace_R, arr, newdim)
      .Call(XGSetArrayDimNamesInplace_R, arr, newdn)
    }
  } else {
    ## Normal prediction
    if (reshape && n_groups != 1) {
      arr <- matrix(arr, ncol = n_groups, byrow = TRUE)
    } else {
      .Call(XGSetArrayDimInplace_R, arr, NULL)
    }
  }
  return(arr)
 }
--- a/R-package/R/xgb.plot.shap.R
+++ b/R-package/R/xgb.plot.shap.R
@ -294,8 +294,10 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
  if (is.null(features) && (is.null(model) || !inherits(model, "xgb.Booster")))
    stop("when features are not provided, one must provide an xgb.Booster model to rank the features")
  last_dim <- function(v) dim(v)[length(dim(v))]
  if (!is.null(shap_contrib) &&
-      (!is.matrix(shap_contrib) || nrow(shap_contrib) != nrow(data) || ncol(shap_contrib) != ncol(data) + 1))
+      (!is.array(shap_contrib) || nrow(shap_contrib) != nrow(data) || last_dim(shap_contrib) != ncol(data) + 1))
    stop("shap_contrib is not compatible with the provided data")
  if (is.character(features) && is.null(colnames(data)))
@ -318,20 +320,40 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
    colnames(data) <- paste0("X", seq_len(ncol(data)))
  }
-  if (!is.null(shap_contrib)) {
+  reshape_3d_shap_contrib <- function(shap_contrib, target_class) {
-    if (is.list(shap_contrib)) { # multiclass: either choose a class or merge
+    # multiclass: either choose a class or merge
-      shap_contrib <- if (!is.null(target_class)) shap_contrib[[target_class + 1]] else Reduce("+", lapply(shap_contrib, abs))
+    if (is.list(shap_contrib)) {
      if (!is.null(target_class)) {
        shap_contrib <- shap_contrib[[target_class + 1]]
      } else {
        shap_contrib <- Reduce("+", lapply(shap_contrib, abs))
      }
-    shap_contrib <- shap_contrib[idx, ]
+    } else if (length(dim(shap_contrib)) > 2) {
-    if (is.null(colnames(shap_contrib))) {
+      if (!is.null(target_class)) {
-      colnames(shap_contrib) <- paste0("X", seq_len(ncol(data)))
+        orig_shape <- dim(shap_contrib)
        shap_contrib <- shap_contrib[, target_class + 1, , drop = TRUE]
        if (!is.matrix(shap_contrib)) {
          shap_contrib <- matrix(shap_contrib, orig_shape[c(1L, 3L)])
        }
      } else {
-    shap_contrib <- predict(model, newdata = data, predcontrib = TRUE, approxcontrib = approxcontrib)
+        shap_contrib <- apply(abs(shap_contrib), c(1L, 3L), sum)
    if (is.list(shap_contrib)) { # multiclass: either choose a class or merge
      shap_contrib <- if (!is.null(target_class)) shap_contrib[[target_class + 1]] else Reduce("+", lapply(shap_contrib, abs))
      }
    }
    return(shap_contrib)
  }
  if (is.null(shap_contrib)) {
    shap_contrib <- predict(
      model,
      newdata = data,
      predcontrib = TRUE,
      approxcontrib = approxcontrib
    )
  }
  shap_contrib <- reshape_3d_shap_contrib(shap_contrib, target_class)
  if (is.null(colnames(shap_contrib))) {
    colnames(shap_contrib) <- paste0("X", seq_len(ncol(data)))
  }
  if (is.null(features)) {
    if (!is.null(model_feature_names)) {
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@ -13,10 +13,10 @@
  predcontrib = FALSE,
  approxcontrib = FALSE,
  predinteraction = FALSE,
  reshape = FALSE,
  training = FALSE,
  iterationrange = NULL,
  strict_shape = FALSE,
  avoid_transpose = FALSE,
  validate_features = FALSE,
  base_margin = NULL,
  ...
@ -66,10 +66,6 @@ logistic regression would return log-odds instead of probabilities.}
 \item{predinteraction}{Whether to return contributions of feature interactions to individual predictions (see Details).}
 \item{reshape}{Whether to reshape the vector of predictions to matrix form when there are several
 prediction outputs per case. No effect if \code{predleaf}, \code{predcontrib},
 or \code{predinteraction} is \code{TRUE}.}
 \item{training}{Whether the prediction result is used for training. For dart booster,
 training predicting will perform dropout.}
@ -86,8 +82,27 @@ base-1 indexing, and inclusive of both ends).
   If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
 }\if{html}{\out{</div>}}}
-\item{strict_shape}{Default is \code{FALSE}. When set to \code{TRUE}, the output
+\item{strict_shape}{Whether to always return an array with the same dimensions for the given prediction mode
-type and shape of predictions are invariant to the model type.}
+regardless of the model type - meaning that, for example, both a multi-class and a binary classification
 model would generate output arrays with the same number of dimensions, with the 'class' dimension having
 size equal to '1' for the binary model.
 \if{html}{\out{<div class="sourceCode">}}\preformatted{   If passing `FALSE` (the default), dimensions will be simplified according to the model type, so that a
   binary classification model for example would not have a redundant dimension for 'class'.
   See documentation for the return type for the exact shape of the output arrays for each prediction mode.
 }\if{html}{\out{</div>}}}
 \item{avoid_transpose}{Whether to output the resulting predictions in the same memory layout in which they
 are generated by the core XGBoost library, without transposing them to match the expected output shape.
 \if{html}{\out{<div class="sourceCode">}}\preformatted{   Internally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major
   order, hence the result needs to be transposed in order to have the expected shape when represented as
   an R array or matrix, which might be a slow operation.
   If passing `TRUE`, then the result will have dimensions in reverse order - for example, rows
   will be the last dimensions instead of the first dimension.
 }\if{html}{\out{</div>}}}
 \item{validate_features}{When \code{TRUE}, validate that the Booster's and newdata's feature_names
 match (only applicable when both \code{object} and \code{newdata} have feature names).
@ -116,32 +131,46 @@ match (only applicable when both \code{object} and \code{newdata} have feature n
 \item{...}{Not used.}
 }
 \value{
-The return type depends on \code{strict_shape}. If \code{FALSE} (default):
+A numeric vector or array, with corresponding dimensions depending on the prediction mode and on
-\itemize{
+parameter \code{strict_shape} as follows:
-\item For regression or binary classification: A vector of length \code{nrows(newdata)}.
+
-\item For multiclass classification: A vector of length \code{num_class * nrows(newdata)} or
+If passing \code{strict_shape=FALSE}:\itemize{
-a \verb{(nrows(newdata), num_class)} matrix, depending on the \code{reshape} value.
+\item For regression or binary classification: a vector of length \code{nrows}.
-\item When \code{predleaf = TRUE}: A matrix with one column per tree.
+\item For multi-class and multi-target objectives: a matrix of dimensions \verb{[nrows, ngroups]}.
-\item When \code{predcontrib = TRUE}: When not multiclass, a matrix with
+
-\code{ num_features + 1} columns. The last "+ 1" column corresponds to the baseline value.
+Note that objective variant \code{multi:softmax} defaults towards predicting most likely class (a vector
-In the multiclass case, a list of \code{num_class} such matrices.
+\code{nrows}) instead of per-class probabilities.
-The contribution values are on the scale of untransformed margin
+\item For \code{predleaf}: a matrix with one column per tree.
-(e.g., for binary classification, the values are log-odds deviations from the baseline).
+
-\item When \code{predinteraction = TRUE}: When not multiclass, the output is a 3d array of
+For multi-class / multi-target, they will be arranged so that columns in the output will have
-dimension \code{c(nrow, num_features + 1, num_features + 1)}. The off-diagonal (in the last two dimensions)
+the leafs from one group followed by leafs of the other group (e.g. order will be \code{group1:feat1},
-elements represent different feature interaction contributions. The array is symmetric WRT the last
+\code{group1:feat2}, ..., \code{group2:feat1}, \code{group2:feat2}, ...).
-two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should
+\item For \code{predcontrib}: when not multi-class / multi-target, a matrix with dimensions
-produce practically the same result as \code{predcontrib = TRUE}.
+\verb{[nrows, nfeats+1]}. The last "+ 1" column corresponds to the baseline value.
-In the multiclass case, a list of \code{num_class} such arrays.
+
 For multi-class and multi-target objectives, will be an array with dimensions \verb{[nrows, ngroups, nfeats+1]}.
 The contribution values are on the scale of untransformed margin (e.g., for binary classification,
 the values are log-odds deviations from the baseline).
 \item For \code{predinteraction}: when not multi-class / multi-target, the output is a 3D array of
 dimensions \verb{[nrows, nfeats+1, nfeats+1]}. The off-diagonal (in the last two dimensions)
 elements represent different feature interaction contributions. The array is symmetric w.r.t. the last
 two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last
 dimension should produce practically the same result as \code{predcontrib = TRUE}.
 For multi-class and multi-target, will be a 4D array with dimensions \verb{[nrows, ngroups, nfeats+1, nfeats+1]}
 }
-When \code{strict_shape = TRUE}, the output is always an array:
+If passing \code{strict_shape=FALSE}, the result is always an array:\itemize{
-\itemize{
+\item For normal predictions, the dimension is \verb{[nrows, ngroups]}.
-\item For normal predictions, the output has dimension \verb{(num_class, nrow(newdata))}.
+\item For \code{predcontrib=TRUE}, the dimension is \verb{[nrows, ngroups, nfeats+1]}.
-\item For \code{predcontrib = TRUE}, the dimension is \verb{(ncol(newdata) + 1, num_class, nrow(newdata))}.
+\item For \code{predinteraction=TRUE}, the dimension is \verb{[nrows, ngroups, nfeats+1, nfeats+1]}.
-\item For \code{predinteraction = TRUE}, the dimension is \verb{(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))}.
+\item For \code{predleaf=TRUE}, the dimension is \verb{[nrows, niter, ngroups, num_parallel_tree]}.
 \item For \code{predleaf = TRUE}, the dimension is \verb{(n_trees_in_forest, num_class, n_iterations, nrow(newdata))}.
 }
 If passing \code{avoid_transpose=TRUE}, then the dimensions in all cases will be in reverse order - for
 example, for \code{predinteraction}, they will be \verb{[nfeats+1, nfeats+1, ngroups, nrows]}
 instead of \verb{[nrows, ngroups, nfeats+1, nfeats+1]}.
 }
 \description{
 Predict values on data based on xgboost model.
@ -241,8 +270,6 @@ bst <- xgb.train(
 # predict for softmax returns num_class probability numbers per case:
 pred <- predict(bst, as.matrix(iris[, -5]))
 str(pred)
 # reshape it to a num_class-columns matrix
 pred <- matrix(pred, ncol = num_class, byrow = TRUE)
 # convert the probabilities to softmax labels
 pred_labels <- max.col(pred) - 1
 # the following should result in the same error as seen in the last iteration
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@ -45,7 +45,6 @@ extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);
 extern SEXP XGBoosterUpdateOneIter_R(SEXP, SEXP, SEXP);
 extern SEXP XGCheckNullPtr_R(SEXP);
 extern SEXP XGSetArrayDimInplace_R(SEXP, SEXP);
 extern SEXP XGSetArrayDimNamesInplace_R(SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
 extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
@ -108,7 +107,6 @@ static const R_CallMethodDef CallEntries[] = {
  {"XGBoosterSetParam_R",         (DL_FUNC) &XGBoosterSetParam_R,         3},
  {"XGBoosterUpdateOneIter_R",    (DL_FUNC) &XGBoosterUpdateOneIter_R,    3},
  {"XGCheckNullPtr_R",            (DL_FUNC) &XGCheckNullPtr_R,            1},
  {"XGSetArrayDimInplace_R",      (DL_FUNC) &XGSetArrayDimInplace_R,      2},
  {"XGSetArrayDimNamesInplace_R", (DL_FUNC) &XGSetArrayDimNamesInplace_R, 2},
  {"XGDMatrixCreateFromCSC_R",    (DL_FUNC) &XGDMatrixCreateFromCSC_R,    6},
  {"XGDMatrixCreateFromCSR_R",    (DL_FUNC) &XGDMatrixCreateFromCSR_R,    6},
--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@ -330,11 +330,6 @@ XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle) {
  return Rf_ScalarLogical(R_ExternalPtrAddr(handle) == nullptr);
 }
 XGB_DLL SEXP XGSetArrayDimInplace_R(SEXP arr, SEXP dims) {
  Rf_setAttrib(arr, R_DimSymbol, dims);
  return R_NilValue;
 }
 XGB_DLL SEXP XGSetArrayDimNamesInplace_R(SEXP arr, SEXP dim_names) {
  Rf_setAttrib(arr, R_DimNamesSymbol, dim_names);
  return R_NilValue;
@ -1301,12 +1296,9 @@ enum class PredictionInputType {DMatrix, DenseMatrix, CSRMatrix, DataFrame};
 SEXP XGBoosterPredictGeneric(SEXP handle, SEXP input_data, SEXP json_config,
                                    PredictionInputType input_type, SEXP missing,
                                    SEXP base_margin) {
-  SEXP r_out_shape;
+  SEXP r_out_result = R_NilValue;
  SEXP r_out_result;
  SEXP r_out = Rf_protect(Rf_allocVector(VECSXP, 2));
  SEXP json_config_ = Rf_protect(Rf_asChar(json_config));
  R_API_BEGIN();
  SEXP json_config_ = Rf_protect(Rf_asChar(json_config));
  char const *c_json_config = CHAR(json_config_);
  bst_ulong out_dim;
@ -1386,23 +1378,24 @@ SEXP XGBoosterPredictGeneric(SEXP handle, SEXP input_data, SEXP json_config,
  }
  CHECK_CALL(res_code);
-  r_out_shape = Rf_protect(Rf_allocVector(INTSXP, out_dim));
+  SEXP r_out_shape = Rf_protect(Rf_allocVector(INTSXP, out_dim));
  size_t len = 1;
  int *r_out_shape_ = INTEGER(r_out_shape);
  for (size_t i = 0; i < out_dim; ++i) {
-    r_out_shape_[i] = out_shape[i];
+    r_out_shape_[out_dim - i - 1] = out_shape[i];
    len *= out_shape[i];
  }
  r_out_result = Rf_protect(Rf_allocVector(REALSXP, len));
  std::copy(out_result, out_result + len, REAL(r_out_result));
-  SET_VECTOR_ELT(r_out, 0, r_out_shape);
+  if (out_dim > 1) {
-  SET_VECTOR_ELT(r_out, 1, r_out_result);
+    Rf_setAttrib(r_out_result, R_DimSymbol, r_out_shape);
  }
  R_API_END();
-  Rf_unprotect(4);
+  Rf_unprotect(3);
-  return r_out;
+  return r_out_result;
 }
 }  // namespace
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@ -26,14 +26,6 @@
 */
 XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle);
 /*!
 * \brief set the dimensions of an array in-place
 * \param arr
 * \param dims dimensions to set to the array
 * \return NULL value
 */
 XGB_DLL SEXP XGSetArrayDimInplace_R(SEXP arr, SEXP dims);
 /*!
 * \brief set the names of the dimensions of an array in-place
 * \param arr
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@ -162,20 +162,20 @@ test_that("train and predict softprob", {
  pred <- predict(bst, as.matrix(iris[, -5]))
  expect_length(pred, nrow(iris) * 3)
  # row sums add up to total probability of 1:
-  expect_equal(rowSums(matrix(pred, ncol = 3, byrow = TRUE)), rep(1, nrow(iris)), tolerance = 1e-7)
+  expect_equal(rowSums(pred), rep(1, nrow(iris)), tolerance = 1e-7)
  # manually calculate error at the last iteration:
-  mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
+  mpred <- predict(bst, as.matrix(iris[, -5]))
-  expect_equal(as.numeric(t(mpred)), pred)
+  expect_equal(mpred, pred)
  pred_labels <- max.col(mpred) - 1
  err <- sum(pred_labels != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)
  # manually calculate error at the 1st iteration:
-  mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
+  mpred <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 1))
  pred_labels <- max.col(mpred) - 1
  err <- sum(pred_labels != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[1, train_merror], err, tolerance = 5e-6)
-  mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
+  mpred1 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 1))
  expect_equal(mpred, mpred1)
  d <- cbind(
@ -190,7 +190,7 @@ test_that("train and predict softprob", {
    data = dtrain, nrounds = 4, num_class = 10,
    objective = "multi:softprob"
  )
-  predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE)
+  predt <- predict(booster, as.matrix(d), strict_shape = FALSE)
  expect_equal(ncol(predt), 10)
  expect_equal(rowSums(predt), rep(1, 100), tolerance = 1e-7)
 })
@ -254,13 +254,13 @@ test_that("train and predict RF with softprob", {
  )
  expect_equal(xgb.get.num.boosted.rounds(bst), 15)
  # predict for all iterations:
-  pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
+  pred <- predict(bst, as.matrix(iris[, -5]))
  expect_equal(dim(pred), c(nrow(iris), 3))
  pred_labels <- max.col(pred) - 1
  err <- sum(pred_labels != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)
  # predict for 7 iterations and adjust for 4 parallel trees per iteration
-  pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 7))
+  pred <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 7))
  err <- sum((max.col(pred) - 1) != lb) / length(lb)
  expect_equal(attributes(bst)$evaluation_log[7, train_merror], err, tolerance = 5e-6)
 })
@ -485,15 +485,25 @@ test_that("strict_shape works", {
    n_rows <- nrow(X)
    n_cols <- ncol(X)
-    expect_equal(dim(predt), c(n_groups, n_rows))
+    expect_equal(dim(predt), c(n_rows, n_groups))
-    expect_equal(dim(margin), c(n_groups, n_rows))
+    expect_equal(dim(margin), c(n_rows, n_groups))
-    expect_equal(dim(contri), c(n_cols + 1, n_groups, n_rows))
+    expect_equal(dim(contri), c(n_rows, n_groups, n_cols + 1))
-    expect_equal(dim(interact), c(n_cols + 1, n_cols + 1, n_groups, n_rows))
+    expect_equal(dim(interact), c(n_rows, n_groups, n_cols + 1, n_cols + 1))
-    expect_equal(dim(leaf), c(1, n_groups, n_rounds, n_rows))
+    expect_equal(dim(leaf), c(n_rows, n_rounds, n_groups, 1))
    if (n_groups != 1) {
      for (g in seq_len(n_groups)) {
-        expect_lt(max(abs(colSums(contri[, g, ]) - margin[g, ])), 1e-5)
+        expect_lt(max(abs(rowSums(contri[, g, ]) - margin[, g])), 1e-5)
      }
      leaf_no_strict <- predict(bst, X, strict_shape = FALSE, predleaf = TRUE)
      for (g in seq_len(n_groups)) {
        g_mask <- rep(FALSE, n_groups)
        g_mask[g] <- TRUE
        expect_equal(
          leaf[, , g, 1L],
          leaf_no_strict[, g_mask]
        )
      }
    }
  }
@ -562,7 +572,7 @@ test_that("Quantile regression accepts multiple quantiles", {
    ),
    nrounds = 15
  )
-  pred <- predict(model, x, reshape = TRUE)
+  pred <- predict(model, x)
  expect_equal(dim(pred)[1], nrow(x))
  expect_equal(dim(pred)[2], 3)
@ -590,7 +600,7 @@ test_that("Can use multi-output labels with built-in objectives", {
    data = dm,
    nrounds = 5
  )
-  pred <- predict(model, x, reshape = TRUE)
+  pred <- predict(model, x)
  expect_equal(pred[, 1], -pred[, 2])
  expect_true(cor(y, pred[, 1]) > 0.9)
  expect_true(cor(y, pred[, 2]) < -0.9)
@ -619,7 +629,7 @@ test_that("Can use multi-output labels with custom objectives", {
    data = dm,
    nrounds = 5
  )
-  pred <- predict(model, x, reshape = TRUE)
+  pred <- predict(model, x)
  expect_equal(pred[, 1], -pred[, 2])
  expect_true(cor(y, pred[, 1]) > 0.9)
  expect_true(cor(y, pred[, 2]) < -0.9)
@ -666,8 +676,8 @@ test_that("Can predict on data.frame objects", {
    nrounds = 5
  )
-  pred_mat <- predict(model, xgb.DMatrix(x_mat), nthread = n_threads)
+  pred_mat <- predict(model, xgb.DMatrix(x_mat))
-  pred_df <- predict(model, x_df, nthread = n_threads)
+  pred_df <- predict(model, x_df)
  expect_equal(pred_mat, pred_df)
 })
@ -737,7 +747,7 @@ test_that("Coefficients from gblinear have the expected shape and names", {
  expect_equal(nrow(coefs), ncol(x) + 1)
  expect_equal(ncol(coefs), 3)
  expect_equal(row.names(coefs), c("(Intercept)", colnames(x)))
-  pred_auto <- predict(model, x, outputmargin = TRUE, reshape = TRUE)
+  pred_auto <- predict(model, x, outputmargin = TRUE)
  pred_manual <- unname(mm %*% coefs)
  expect_equal(pred_manual, pred_auto, tolerance = 1e-7)
 })
--- a/R-package/tests/testthat/test_booster_slicing.R
+++ b/R-package/tests/testthat/test_booster_slicing.R
@ -9,7 +9,7 @@ model <- xgb.train(
  data = dm,
  nrounds = 20
 )
-pred <- predict(model, dm, predleaf = TRUE, reshape = TRUE)
+pred <- predict(model, dm, predleaf = TRUE)
 test_that("Slicing full model", {
  new_model <- xgb.slice.Booster(model, 1, 0)
@ -24,32 +24,32 @@ test_that("Slicing full model", {
 test_that("Slicing sequence from start", {
  new_model <- xgb.slice.Booster(model, 1, 10)
-  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+  new_pred <- predict(new_model, dm, predleaf = TRUE)
  expect_equal(new_pred, pred[, seq(1, 10)])
  new_model <- model[1:10]
-  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+  new_pred <- predict(new_model, dm, predleaf = TRUE)
  expect_equal(new_pred, pred[, seq(1, 10)])
 })
 test_that("Slicing sequence from middle", {
  new_model <- xgb.slice.Booster(model, 5, 10)
-  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+  new_pred <- predict(new_model, dm, predleaf = TRUE)
  expect_equal(new_pred, pred[, seq(5, 10)])
  new_model <- model[5:10]
-  new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+  new_pred <- predict(new_model, dm, predleaf = TRUE)
  expect_equal(new_pred, pred[, seq(5, 10)])
 })
 test_that("Slicing with non-unit step", {
  for (s in 2:5) {
    new_model <- xgb.slice.Booster(model, 1, 17, s)
-    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+    new_pred <- predict(new_model, dm, predleaf = TRUE)
    expect_equal(new_pred, pred[, seq(1, 17, s)])
    new_model <- model[seq(1, 17, s)]
-    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+    new_pred <- predict(new_model, dm, predleaf = TRUE)
    expect_equal(new_pred, pred[, seq(1, 17, s)])
  }
 })
@ -57,11 +57,11 @@ test_that("Slicing with non-unit step", {
 test_that("Slicing with non-unit step from middle", {
  for (s in 2:5) {
    new_model <- xgb.slice.Booster(model, 4, 17, s)
-    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+    new_pred <- predict(new_model, dm, predleaf = TRUE)
    expect_equal(new_pred, pred[, seq(4, 17, s)])
    new_model <- model[seq(4, 17, s)]
-    new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
+    new_pred <- predict(new_model, dm, predleaf = TRUE)
    expect_equal(new_pred, pred[, seq(4, 17, s)])
  }
 })
--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@ -400,12 +400,10 @@ test_that("xgb.DMatrix: can take multi-dimensional 'base_margin'", {
    ),
    nround = 1
  )
-  pred_only_x <- predict(model, x, nthread = n_threads, reshape = TRUE)
+  pred_only_x <- predict(model, x)
  pred_w_base <- predict(
    model,
-    xgb.DMatrix(data = x, base_margin = b, nthread = n_threads),
+    xgb.DMatrix(data = x, base_margin = b)
    nthread = n_threads,
    reshape = TRUE
  )
  expect_equal(pred_only_x, pred_w_base - b, tolerance = 1e-5)
 })
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@ -132,31 +132,31 @@ test_that("predict feature contributions works", {
               tolerance = float_tolerance)
  # gbtree multiclass
-  pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
+  pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE)
  pred_contr <- predict(mbst.Tree, as.matrix(iris[, -5]), predcontrib = TRUE)
-  expect_is(pred_contr, "list")
+  expect_is(pred_contr, "array")
-  expect_length(pred_contr, 3)
+  expect_length(dim(pred_contr), 3)
-  for (g in seq_along(pred_contr)) {
+  for (g in seq_len(dim(pred_contr)[2])) {
-    expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "(Intercept)"))
+    expect_equal(colnames(pred_contr[, g, ]), c(colnames(iris[, -5]), "(Intercept)"))
-    expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 1e-5)
+    expect_lt(max(abs(rowSums(pred_contr[, g, ]) - pred[, g])), 1e-5)
  }
  # gblinear multiclass (set base_score = 0, which is base margin in multiclass)
-  pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
+  pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE)
  pred_contr <- predict(mbst.GLM, as.matrix(iris[, -5]), predcontrib = TRUE)
-  expect_length(pred_contr, 3)
+  expect_length(dim(pred_contr), 3)
  coefs_all <- matrix(
    data = as.numeric(xgb.dump(mbst.GLM)[-c(1, 2, 6)]),
    ncol = 3,
    byrow = TRUE
  )
-  for (g in seq_along(pred_contr)) {
+  for (g in seq_along(dim(pred_contr)[2])) {
-    expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "(Intercept)"))
+    expect_equal(colnames(pred_contr[, g, ]), c(colnames(iris[, -5]), "(Intercept)"))
-    expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
+    expect_lt(max(abs(rowSums(pred_contr[, g, ]) - pred[, g])), float_tolerance)
    # manual calculation of linear terms
    coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last
    pred_contr_manual <- sweep(as.matrix(cbind(iris[, -5], 1)), 2, coefs, FUN = "*")
-    expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual),
+    expect_equal(as.numeric(pred_contr[, g, ]), as.numeric(pred_contr_manual),
                 tolerance = float_tolerance)
  }
 })
--- a/R-package/tests/testthat/test_interactions.R
+++ b/R-package/tests/testthat/test_interactions.R
@ -127,41 +127,23 @@ test_that("multiclass feature interactions work", {
    eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3, nthread = n_threads
  )
  b <- xgb.train(param, dm, 40)
-  pred <- t(
+  pred <- predict(b, dm, outputmargin = TRUE)
    array(
      data = predict(b, dm, outputmargin = TRUE),
      dim = c(3, 150)
    )
  )
  # SHAP contributions:
  cont <- predict(b, dm, predcontrib = TRUE)
-  expect_length(cont, 3)
+  expect_length(dim(cont), 3)
  # rewrap them as a 3d array
  cont <- array(
    data = unlist(cont),
    dim = c(150, 5,  3)
  )
  # make sure for each row they add up to marginal predictions
-  expect_lt(max(abs(apply(cont, c(1, 3), sum) - pred)), 0.001)
+  expect_lt(max(abs(apply(cont, c(1, 2), sum) - pred)), 0.001)
  # SHAP interaction contributions:
  intr <- predict(b, dm, predinteraction = TRUE)
-  expect_length(intr, 3)
+  expect_length(dim(intr), 4)
  # rewrap them as a 4d array
  intr <- aperm(
    a = array(
      data = unlist(intr),
      dim = c(150, 5, 5, 3)
    ),
    perm = c(4, 1, 2, 3)  # [grp, row, col, col]
  )
  # check the symmetry
  expect_lt(max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)), 0.00001)
  # sums WRT columns must be close to feature contributions
-  expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))), 0.00001)
+  expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - cont)), 0.00001)
 })