diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index 39734ab09..a00678a2f 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -853,8 +853,7 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) { pr <- predict( fd$bst, fd$evals[[2L]], - outputmargin = env$outputmargin, - reshape = TRUE + outputmargin = env$outputmargin ) if (is.null(pred)) { if (NCOL(pr) > 1L) { diff --git a/R-package/R/utils.R b/R-package/R/utils.R index d08a411e2..8b87468a4 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -199,8 +199,7 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) { bst, dtrain, outputmargin = TRUE, - training = TRUE, - reshape = TRUE + training = TRUE ) gpair <- obj(pred, dtrain) n_samples <- dim(dtrain)[1] @@ -246,7 +245,7 @@ xgb.iter.eval <- function(bst, evals, iter, feval) { res <- sapply(seq_along(evals), function(j) { w <- evals[[j]] ## predict using all trees - preds <- predict(bst, w, outputmargin = TRUE, reshape = TRUE, iterationrange = "all") + preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all") eval_res <- feval(preds, w) out <- eval_res$value names(out) <- paste0(evnames[j], "-", eval_res$metric) diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index cfea11ae3..4b2ba60c3 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -112,9 +112,6 @@ xgb.get.handle <- function(object) { #' @param predcontrib Whether to return feature contributions to individual predictions (see Details). #' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details). #' @param predinteraction Whether to return contributions of feature interactions to individual predictions (see Details). -#' @param reshape Whether to reshape the vector of predictions to matrix form when there are several -#' prediction outputs per case. No effect if `predleaf`, `predcontrib`, -#' or `predinteraction` is `TRUE`. #' @param training Whether the prediction result is used for training. For dart booster, #' training predicting will perform dropout. #' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing @@ -128,8 +125,24 @@ xgb.get.handle <- function(object) { #' of the iterations (rounds) otherwise. #' #' If passing "all", will use all of the rounds regardless of whether the model had early stopping or not. -#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output -#' type and shape of predictions are invariant to the model type. +#' @param strict_shape Whether to always return an array with the same dimensions for the given prediction mode +#' regardless of the model type - meaning that, for example, both a multi-class and a binary classification +#' model would generate output arrays with the same number of dimensions, with the 'class' dimension having +#' size equal to '1' for the binary model. +#' +#' If passing `FALSE` (the default), dimensions will be simplified according to the model type, so that a +#' binary classification model for example would not have a redundant dimension for 'class'. +#' +#' See documentation for the return type for the exact shape of the output arrays for each prediction mode. +#' @param avoid_transpose Whether to output the resulting predictions in the same memory layout in which they +#' are generated by the core XGBoost library, without transposing them to match the expected output shape. +#' +#' Internally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major +#' order, hence the result needs to be transposed in order to have the expected shape when represented as +#' an R array or matrix, which might be a slow operation. +#' +#' If passing `TRUE`, then the result will have dimensions in reverse order - for example, rows +#' will be the last dimensions instead of the first dimension. #' @param base_margin Base margin used for boosting from existing model. #' #' Note that, if `newdata` is an `xgb.DMatrix` object, this argument will @@ -180,28 +193,46 @@ xgb.get.handle <- function(object) { #' Note that converting a matrix to [xgb.DMatrix()] uses multiple threads too. #' #' @return -#' The return type depends on `strict_shape`. If `FALSE` (default): -#' - For regression or binary classification: A vector of length `nrows(newdata)`. -#' - For multiclass classification: A vector of length `num_class * nrows(newdata)` or -#' a `(nrows(newdata), num_class)` matrix, depending on the `reshape` value. -#' - When `predleaf = TRUE`: A matrix with one column per tree. -#' - When `predcontrib = TRUE`: When not multiclass, a matrix with -#' ` num_features + 1` columns. The last "+ 1" column corresponds to the baseline value. -#' In the multiclass case, a list of `num_class` such matrices. -#' The contribution values are on the scale of untransformed margin -#' (e.g., for binary classification, the values are log-odds deviations from the baseline). -#' - When `predinteraction = TRUE`: When not multiclass, the output is a 3d array of -#' dimension `c(nrow, num_features + 1, num_features + 1)`. The off-diagonal (in the last two dimensions) -#' elements represent different feature interaction contributions. The array is symmetric WRT the last -#' two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should -#' produce practically the same result as `predcontrib = TRUE`. -#' In the multiclass case, a list of `num_class` such arrays. +#' A numeric vector or array, with corresponding dimensions depending on the prediction mode and on +#' parameter `strict_shape` as follows: #' -#' When `strict_shape = TRUE`, the output is always an array: -#' - For normal predictions, the output has dimension `(num_class, nrow(newdata))`. -#' - For `predcontrib = TRUE`, the dimension is `(ncol(newdata) + 1, num_class, nrow(newdata))`. -#' - For `predinteraction = TRUE`, the dimension is `(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))`. -#' - For `predleaf = TRUE`, the dimension is `(n_trees_in_forest, num_class, n_iterations, nrow(newdata))`. +#' If passing `strict_shape=FALSE`:\itemize{ +#' \item For regression or binary classification: a vector of length `nrows`. +#' \item For multi-class and multi-target objectives: a matrix of dimensions `[nrows, ngroups]`. +#' +#' Note that objective variant `multi:softmax` defaults towards predicting most likely class (a vector +#' `nrows`) instead of per-class probabilities. +#' \item For `predleaf`: a matrix with one column per tree. +#' +#' For multi-class / multi-target, they will be arranged so that columns in the output will have +#' the leafs from one group followed by leafs of the other group (e.g. order will be `group1:feat1`, +#' `group1:feat2`, ..., `group2:feat1`, `group2:feat2`, ...). +#' \item For `predcontrib`: when not multi-class / multi-target, a matrix with dimensions +#' `[nrows, nfeats+1]`. The last "+ 1" column corresponds to the baseline value. +#' +#' For multi-class and multi-target objectives, will be an array with dimensions `[nrows, ngroups, nfeats+1]`. +#' +#' The contribution values are on the scale of untransformed margin (e.g., for binary classification, +#' the values are log-odds deviations from the baseline). +#' \item For `predinteraction`: when not multi-class / multi-target, the output is a 3D array of +#' dimensions `[nrows, nfeats+1, nfeats+1]`. The off-diagonal (in the last two dimensions) +#' elements represent different feature interaction contributions. The array is symmetric w.r.t. the last +#' two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last +#' dimension should produce practically the same result as `predcontrib = TRUE`. +#' +#' For multi-class and multi-target, will be a 4D array with dimensions `[nrows, ngroups, nfeats+1, nfeats+1]` +#' } +#' +#' If passing `strict_shape=FALSE`, the result is always an array:\itemize{ +#' \item For normal predictions, the dimension is `[nrows, ngroups]`. +#' \item For `predcontrib=TRUE`, the dimension is `[nrows, ngroups, nfeats+1]`. +#' \item For `predinteraction=TRUE`, the dimension is `[nrows, ngroups, nfeats+1, nfeats+1]`. +#' \item For `predleaf=TRUE`, the dimension is `[nrows, niter, ngroups, num_parallel_tree]`. +#' } +#' +#' If passing `avoid_transpose=TRUE`, then the dimensions in all cases will be in reverse order - for +#' example, for `predinteraction`, they will be `[nfeats+1, nfeats+1, ngroups, nrows]` +#' instead of `[nrows, ngroups, nfeats+1, nfeats+1]`. #' @seealso [xgb.train()] #' @references #' 1. Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions", @@ -279,8 +310,6 @@ xgb.get.handle <- function(object) { #' # predict for softmax returns num_class probability numbers per case: #' pred <- predict(bst, as.matrix(iris[, -5])) #' str(pred) -#' # reshape it to a num_class-columns matrix -#' pred <- matrix(pred, ncol = num_class, byrow = TRUE) #' # convert the probabilities to softmax labels #' pred_labels <- max.col(pred) - 1 #' # the following should result in the same error as seen in the last iteration @@ -311,8 +340,11 @@ xgb.get.handle <- function(object) { #' @export predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE, - reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, + training = FALSE, iterationrange = NULL, strict_shape = FALSE, avoid_transpose = FALSE, validate_features = FALSE, base_margin = NULL, ...) { + if (NROW(list(...))) { + warning("Passed unused prediction arguments: ", paste(names(list(...)), collapse = ", "), ".") + } if (validate_features) { newdata <- validate.features(object, newdata) } @@ -415,10 +447,9 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA return(val) } - ## We set strict_shape to TRUE then drop the dimensions conditionally args <- list( training = box(training), - strict_shape = box(TRUE), + strict_shape = as.logical(strict_shape), iteration_begin = box(as.integer(iterationrange[1])), iteration_end = box(as.integer(iterationrange[2])), type = box(as.integer(0)) @@ -445,96 +476,36 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA json_conf <- jsonlite::toJSON(args, auto_unbox = TRUE) if (is_dmatrix) { - predts <- .Call( + arr <- .Call( XGBoosterPredictFromDMatrix_R, xgb.get.handle(object), newdata, json_conf ) } else if (use_as_dense_matrix) { - predts <- .Call( + arr <- .Call( XGBoosterPredictFromDense_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin ) } else if (use_as_csr_matrix) { - predts <- .Call( + arr <- .Call( XGBoosterPredictFromCSR_R, xgb.get.handle(object), csr_data, missing, json_conf, base_margin ) } else if (use_as_df) { - predts <- .Call( + arr <- .Call( XGBoosterPredictFromColumnar_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin ) } - names(predts) <- c("shape", "results") - shape <- predts$shape - arr <- predts$results - - n_ret <- length(arr) - if (n_row != shape[1]) { - stop("Incorrect predict shape.") - } - - .Call(XGSetArrayDimInplace_R, arr, rev(shape)) - - cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "(Intercept)") else NULL - n_groups <- shape[2] - ## Needed regardless of whether strict shape is being used. - if (predcontrib) { - .Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, NULL, NULL)) - } else if (predinteraction) { - .Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, cnames, NULL, NULL)) - } - if (strict_shape) { - return(arr) # strict shape is calculated by libxgboost uniformly. + if ((predcontrib || predinteraction) && !is.null(colnames(newdata))) { + cnames <- c(colnames(newdata), "(Intercept)") + dim_names <- vector(mode = "list", length = length(dim(arr))) + dim_names[[1L]] <- cnames + if (predinteraction) dim_names[[2L]] <- cnames + .Call(XGSetArrayDimNamesInplace_R, arr, dim_names) } - if (predleaf) { - ## Predict leaf - if (n_ret == n_row) { - .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L)) - } else { - arr <- matrix(arr, nrow = n_row, byrow = TRUE) - } - } else if (predcontrib) { - ## Predict contribution - arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col] - if (n_ret == n_row) { - .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L)) - .Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames)) - } else if (n_groups != 1) { - ## turns array into list of matrices - arr <- lapply(seq_len(n_groups), function(g) arr[g, , ]) - } else { - ## remove the first axis (group) - newdim <- dim(arr)[2:3] - newdn <- dimnames(arr)[2:3] - arr <- arr[1, , ] - .Call(XGSetArrayDimInplace_R, arr, newdim) - .Call(XGSetArrayDimNamesInplace_R, arr, newdn) - } - } else if (predinteraction) { - ## Predict interaction - arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col] - if (n_ret == n_row) { - .Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L)) - .Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames)) - } else if (n_groups != 1) { - ## turns array into list of matrices - arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ]) - } else { - ## remove the first axis (group) - arr <- arr[1, , , , drop = FALSE] - newdim <- dim(arr)[2:4] - newdn <- dimnames(arr)[2:4] - .Call(XGSetArrayDimInplace_R, arr, newdim) - .Call(XGSetArrayDimNamesInplace_R, arr, newdn) - } - } else { - ## Normal prediction - if (reshape && n_groups != 1) { - arr <- matrix(arr, ncol = n_groups, byrow = TRUE) - } else { - .Call(XGSetArrayDimInplace_R, arr, NULL) - } + if (!avoid_transpose && is.array(arr)) { + arr <- aperm(arr) } + return(arr) } diff --git a/R-package/R/xgb.plot.shap.R b/R-package/R/xgb.plot.shap.R index be3f71160..20e8f3f43 100644 --- a/R-package/R/xgb.plot.shap.R +++ b/R-package/R/xgb.plot.shap.R @@ -294,8 +294,10 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1, if (is.null(features) && (is.null(model) || !inherits(model, "xgb.Booster"))) stop("when features are not provided, one must provide an xgb.Booster model to rank the features") + last_dim <- function(v) dim(v)[length(dim(v))] + if (!is.null(shap_contrib) && - (!is.matrix(shap_contrib) || nrow(shap_contrib) != nrow(data) || ncol(shap_contrib) != ncol(data) + 1)) + (!is.array(shap_contrib) || nrow(shap_contrib) != nrow(data) || last_dim(shap_contrib) != ncol(data) + 1)) stop("shap_contrib is not compatible with the provided data") if (is.character(features) && is.null(colnames(data))) @@ -318,19 +320,39 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1, colnames(data) <- paste0("X", seq_len(ncol(data))) } - if (!is.null(shap_contrib)) { - if (is.list(shap_contrib)) { # multiclass: either choose a class or merge - shap_contrib <- if (!is.null(target_class)) shap_contrib[[target_class + 1]] else Reduce("+", lapply(shap_contrib, abs)) - } - shap_contrib <- shap_contrib[idx, ] - if (is.null(colnames(shap_contrib))) { - colnames(shap_contrib) <- paste0("X", seq_len(ncol(data))) - } - } else { - shap_contrib <- predict(model, newdata = data, predcontrib = TRUE, approxcontrib = approxcontrib) - if (is.list(shap_contrib)) { # multiclass: either choose a class or merge - shap_contrib <- if (!is.null(target_class)) shap_contrib[[target_class + 1]] else Reduce("+", lapply(shap_contrib, abs)) + reshape_3d_shap_contrib <- function(shap_contrib, target_class) { + # multiclass: either choose a class or merge + if (is.list(shap_contrib)) { + if (!is.null(target_class)) { + shap_contrib <- shap_contrib[[target_class + 1]] + } else { + shap_contrib <- Reduce("+", lapply(shap_contrib, abs)) + } + } else if (length(dim(shap_contrib)) > 2) { + if (!is.null(target_class)) { + orig_shape <- dim(shap_contrib) + shap_contrib <- shap_contrib[, target_class + 1, , drop = TRUE] + if (!is.matrix(shap_contrib)) { + shap_contrib <- matrix(shap_contrib, orig_shape[c(1L, 3L)]) + } + } else { + shap_contrib <- apply(abs(shap_contrib), c(1L, 3L), sum) + } } + return(shap_contrib) + } + + if (is.null(shap_contrib)) { + shap_contrib <- predict( + model, + newdata = data, + predcontrib = TRUE, + approxcontrib = approxcontrib + ) + } + shap_contrib <- reshape_3d_shap_contrib(shap_contrib, target_class) + if (is.null(colnames(shap_contrib))) { + colnames(shap_contrib) <- paste0("X", seq_len(ncol(data))) } if (is.null(features)) { diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd index 9c2e434d0..b7a2effee 100644 --- a/R-package/man/predict.xgb.Booster.Rd +++ b/R-package/man/predict.xgb.Booster.Rd @@ -13,10 +13,10 @@ predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE, - reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, + avoid_transpose = FALSE, validate_features = FALSE, base_margin = NULL, ... @@ -66,10 +66,6 @@ logistic regression would return log-odds instead of probabilities.} \item{predinteraction}{Whether to return contributions of feature interactions to individual predictions (see Details).} -\item{reshape}{Whether to reshape the vector of predictions to matrix form when there are several -prediction outputs per case. No effect if \code{predleaf}, \code{predcontrib}, -or \code{predinteraction} is \code{TRUE}.} - \item{training}{Whether the prediction result is used for training. For dart booster, training predicting will perform dropout.} @@ -86,8 +82,27 @@ base-1 indexing, and inclusive of both ends). If passing "all", will use all of the rounds regardless of whether the model had early stopping or not. }\if{html}{\out{}}} -\item{strict_shape}{Default is \code{FALSE}. When set to \code{TRUE}, the output -type and shape of predictions are invariant to the model type.} +\item{strict_shape}{Whether to always return an array with the same dimensions for the given prediction mode +regardless of the model type - meaning that, for example, both a multi-class and a binary classification +model would generate output arrays with the same number of dimensions, with the 'class' dimension having +size equal to '1' for the binary model. + +\if{html}{\out{
}}\preformatted{ If passing `FALSE` (the default), dimensions will be simplified according to the model type, so that a + binary classification model for example would not have a redundant dimension for 'class'. + + See documentation for the return type for the exact shape of the output arrays for each prediction mode. +}\if{html}{\out{
}}} + +\item{avoid_transpose}{Whether to output the resulting predictions in the same memory layout in which they +are generated by the core XGBoost library, without transposing them to match the expected output shape. + +\if{html}{\out{
}}\preformatted{ Internally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major + order, hence the result needs to be transposed in order to have the expected shape when represented as + an R array or matrix, which might be a slow operation. + + If passing `TRUE`, then the result will have dimensions in reverse order - for example, rows + will be the last dimensions instead of the first dimension. +}\if{html}{\out{
}}} \item{validate_features}{When \code{TRUE}, validate that the Booster's and newdata's feature_names match (only applicable when both \code{object} and \code{newdata} have feature names). @@ -116,32 +131,46 @@ match (only applicable when both \code{object} and \code{newdata} have feature n \item{...}{Not used.} } \value{ -The return type depends on \code{strict_shape}. If \code{FALSE} (default): -\itemize{ -\item For regression or binary classification: A vector of length \code{nrows(newdata)}. -\item For multiclass classification: A vector of length \code{num_class * nrows(newdata)} or -a \verb{(nrows(newdata), num_class)} matrix, depending on the \code{reshape} value. -\item When \code{predleaf = TRUE}: A matrix with one column per tree. -\item When \code{predcontrib = TRUE}: When not multiclass, a matrix with -\code{ num_features + 1} columns. The last "+ 1" column corresponds to the baseline value. -In the multiclass case, a list of \code{num_class} such matrices. -The contribution values are on the scale of untransformed margin -(e.g., for binary classification, the values are log-odds deviations from the baseline). -\item When \code{predinteraction = TRUE}: When not multiclass, the output is a 3d array of -dimension \code{c(nrow, num_features + 1, num_features + 1)}. The off-diagonal (in the last two dimensions) -elements represent different feature interaction contributions. The array is symmetric WRT the last -two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should -produce practically the same result as \code{predcontrib = TRUE}. -In the multiclass case, a list of \code{num_class} such arrays. +A numeric vector or array, with corresponding dimensions depending on the prediction mode and on +parameter \code{strict_shape} as follows: + +If passing \code{strict_shape=FALSE}:\itemize{ +\item For regression or binary classification: a vector of length \code{nrows}. +\item For multi-class and multi-target objectives: a matrix of dimensions \verb{[nrows, ngroups]}. + +Note that objective variant \code{multi:softmax} defaults towards predicting most likely class (a vector +\code{nrows}) instead of per-class probabilities. +\item For \code{predleaf}: a matrix with one column per tree. + +For multi-class / multi-target, they will be arranged so that columns in the output will have +the leafs from one group followed by leafs of the other group (e.g. order will be \code{group1:feat1}, +\code{group1:feat2}, ..., \code{group2:feat1}, \code{group2:feat2}, ...). +\item For \code{predcontrib}: when not multi-class / multi-target, a matrix with dimensions +\verb{[nrows, nfeats+1]}. The last "+ 1" column corresponds to the baseline value. + +For multi-class and multi-target objectives, will be an array with dimensions \verb{[nrows, ngroups, nfeats+1]}. + +The contribution values are on the scale of untransformed margin (e.g., for binary classification, +the values are log-odds deviations from the baseline). +\item For \code{predinteraction}: when not multi-class / multi-target, the output is a 3D array of +dimensions \verb{[nrows, nfeats+1, nfeats+1]}. The off-diagonal (in the last two dimensions) +elements represent different feature interaction contributions. The array is symmetric w.r.t. the last +two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last +dimension should produce practically the same result as \code{predcontrib = TRUE}. + +For multi-class and multi-target, will be a 4D array with dimensions \verb{[nrows, ngroups, nfeats+1, nfeats+1]} } -When \code{strict_shape = TRUE}, the output is always an array: -\itemize{ -\item For normal predictions, the output has dimension \verb{(num_class, nrow(newdata))}. -\item For \code{predcontrib = TRUE}, the dimension is \verb{(ncol(newdata) + 1, num_class, nrow(newdata))}. -\item For \code{predinteraction = TRUE}, the dimension is \verb{(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))}. -\item For \code{predleaf = TRUE}, the dimension is \verb{(n_trees_in_forest, num_class, n_iterations, nrow(newdata))}. +If passing \code{strict_shape=FALSE}, the result is always an array:\itemize{ +\item For normal predictions, the dimension is \verb{[nrows, ngroups]}. +\item For \code{predcontrib=TRUE}, the dimension is \verb{[nrows, ngroups, nfeats+1]}. +\item For \code{predinteraction=TRUE}, the dimension is \verb{[nrows, ngroups, nfeats+1, nfeats+1]}. +\item For \code{predleaf=TRUE}, the dimension is \verb{[nrows, niter, ngroups, num_parallel_tree]}. } + +If passing \code{avoid_transpose=TRUE}, then the dimensions in all cases will be in reverse order - for +example, for \code{predinteraction}, they will be \verb{[nfeats+1, nfeats+1, ngroups, nrows]} +instead of \verb{[nrows, ngroups, nfeats+1, nfeats+1]}. } \description{ Predict values on data based on xgboost model. @@ -241,8 +270,6 @@ bst <- xgb.train( # predict for softmax returns num_class probability numbers per case: pred <- predict(bst, as.matrix(iris[, -5])) str(pred) -# reshape it to a num_class-columns matrix -pred <- matrix(pred, ncol = num_class, byrow = TRUE) # convert the probabilities to softmax labels pred_labels <- max.col(pred) - 1 # the following should result in the same error as seen in the last iteration diff --git a/R-package/src/init.c b/R-package/src/init.c index 5db3218b4..16c1d3b14 100644 --- a/R-package/src/init.c +++ b/R-package/src/init.c @@ -45,7 +45,6 @@ extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP); extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP); extern SEXP XGBoosterUpdateOneIter_R(SEXP, SEXP, SEXP); extern SEXP XGCheckNullPtr_R(SEXP); -extern SEXP XGSetArrayDimInplace_R(SEXP, SEXP); extern SEXP XGSetArrayDimNamesInplace_R(SEXP, SEXP); extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); @@ -108,7 +107,6 @@ static const R_CallMethodDef CallEntries[] = { {"XGBoosterSetParam_R", (DL_FUNC) &XGBoosterSetParam_R, 3}, {"XGBoosterUpdateOneIter_R", (DL_FUNC) &XGBoosterUpdateOneIter_R, 3}, {"XGCheckNullPtr_R", (DL_FUNC) &XGCheckNullPtr_R, 1}, - {"XGSetArrayDimInplace_R", (DL_FUNC) &XGSetArrayDimInplace_R, 2}, {"XGSetArrayDimNamesInplace_R", (DL_FUNC) &XGSetArrayDimNamesInplace_R, 2}, {"XGDMatrixCreateFromCSC_R", (DL_FUNC) &XGDMatrixCreateFromCSC_R, 6}, {"XGDMatrixCreateFromCSR_R", (DL_FUNC) &XGDMatrixCreateFromCSR_R, 6}, diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index 989d3bc0d..5faae8a9f 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -330,11 +330,6 @@ XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle) { return Rf_ScalarLogical(R_ExternalPtrAddr(handle) == nullptr); } -XGB_DLL SEXP XGSetArrayDimInplace_R(SEXP arr, SEXP dims) { - Rf_setAttrib(arr, R_DimSymbol, dims); - return R_NilValue; -} - XGB_DLL SEXP XGSetArrayDimNamesInplace_R(SEXP arr, SEXP dim_names) { Rf_setAttrib(arr, R_DimNamesSymbol, dim_names); return R_NilValue; @@ -1301,12 +1296,9 @@ enum class PredictionInputType {DMatrix, DenseMatrix, CSRMatrix, DataFrame}; SEXP XGBoosterPredictGeneric(SEXP handle, SEXP input_data, SEXP json_config, PredictionInputType input_type, SEXP missing, SEXP base_margin) { - SEXP r_out_shape; - SEXP r_out_result; - SEXP r_out = Rf_protect(Rf_allocVector(VECSXP, 2)); - SEXP json_config_ = Rf_protect(Rf_asChar(json_config)); - + SEXP r_out_result = R_NilValue; R_API_BEGIN(); + SEXP json_config_ = Rf_protect(Rf_asChar(json_config)); char const *c_json_config = CHAR(json_config_); bst_ulong out_dim; @@ -1386,23 +1378,24 @@ SEXP XGBoosterPredictGeneric(SEXP handle, SEXP input_data, SEXP json_config, } CHECK_CALL(res_code); - r_out_shape = Rf_protect(Rf_allocVector(INTSXP, out_dim)); + SEXP r_out_shape = Rf_protect(Rf_allocVector(INTSXP, out_dim)); size_t len = 1; int *r_out_shape_ = INTEGER(r_out_shape); for (size_t i = 0; i < out_dim; ++i) { - r_out_shape_[i] = out_shape[i]; + r_out_shape_[out_dim - i - 1] = out_shape[i]; len *= out_shape[i]; } r_out_result = Rf_protect(Rf_allocVector(REALSXP, len)); std::copy(out_result, out_result + len, REAL(r_out_result)); - SET_VECTOR_ELT(r_out, 0, r_out_shape); - SET_VECTOR_ELT(r_out, 1, r_out_result); + if (out_dim > 1) { + Rf_setAttrib(r_out_result, R_DimSymbol, r_out_shape); + } R_API_END(); - Rf_unprotect(4); + Rf_unprotect(3); - return r_out; + return r_out_result; } } // namespace diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index 7f6554cb6..08f16bac1 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -26,14 +26,6 @@ */ XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle); -/*! - * \brief set the dimensions of an array in-place - * \param arr - * \param dims dimensions to set to the array - * \return NULL value - */ -XGB_DLL SEXP XGSetArrayDimInplace_R(SEXP arr, SEXP dims); - /*! * \brief set the names of the dimensions of an array in-place * \param arr diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index bbb8fb323..f0ebd7a1c 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -162,20 +162,20 @@ test_that("train and predict softprob", { pred <- predict(bst, as.matrix(iris[, -5])) expect_length(pred, nrow(iris) * 3) # row sums add up to total probability of 1: - expect_equal(rowSums(matrix(pred, ncol = 3, byrow = TRUE)), rep(1, nrow(iris)), tolerance = 1e-7) + expect_equal(rowSums(pred), rep(1, nrow(iris)), tolerance = 1e-7) # manually calculate error at the last iteration: - mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE) - expect_equal(as.numeric(t(mpred)), pred) + mpred <- predict(bst, as.matrix(iris[, -5])) + expect_equal(mpred, pred) pred_labels <- max.col(mpred) - 1 err <- sum(pred_labels != lb) / length(lb) expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6) # manually calculate error at the 1st iteration: - mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1)) + mpred <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 1)) pred_labels <- max.col(mpred) - 1 err <- sum(pred_labels != lb) / length(lb) expect_equal(attributes(bst)$evaluation_log[1, train_merror], err, tolerance = 5e-6) - mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1)) + mpred1 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 1)) expect_equal(mpred, mpred1) d <- cbind( @@ -190,7 +190,7 @@ test_that("train and predict softprob", { data = dtrain, nrounds = 4, num_class = 10, objective = "multi:softprob" ) - predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE) + predt <- predict(booster, as.matrix(d), strict_shape = FALSE) expect_equal(ncol(predt), 10) expect_equal(rowSums(predt), rep(1, 100), tolerance = 1e-7) }) @@ -254,13 +254,13 @@ test_that("train and predict RF with softprob", { ) expect_equal(xgb.get.num.boosted.rounds(bst), 15) # predict for all iterations: - pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE) + pred <- predict(bst, as.matrix(iris[, -5])) expect_equal(dim(pred), c(nrow(iris), 3)) pred_labels <- max.col(pred) - 1 err <- sum(pred_labels != lb) / length(lb) expect_equal(attributes(bst)$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6) # predict for 7 iterations and adjust for 4 parallel trees per iteration - pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 7)) + pred <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 7)) err <- sum((max.col(pred) - 1) != lb) / length(lb) expect_equal(attributes(bst)$evaluation_log[7, train_merror], err, tolerance = 5e-6) }) @@ -485,15 +485,25 @@ test_that("strict_shape works", { n_rows <- nrow(X) n_cols <- ncol(X) - expect_equal(dim(predt), c(n_groups, n_rows)) - expect_equal(dim(margin), c(n_groups, n_rows)) - expect_equal(dim(contri), c(n_cols + 1, n_groups, n_rows)) - expect_equal(dim(interact), c(n_cols + 1, n_cols + 1, n_groups, n_rows)) - expect_equal(dim(leaf), c(1, n_groups, n_rounds, n_rows)) + expect_equal(dim(predt), c(n_rows, n_groups)) + expect_equal(dim(margin), c(n_rows, n_groups)) + expect_equal(dim(contri), c(n_rows, n_groups, n_cols + 1)) + expect_equal(dim(interact), c(n_rows, n_groups, n_cols + 1, n_cols + 1)) + expect_equal(dim(leaf), c(n_rows, n_rounds, n_groups, 1)) if (n_groups != 1) { for (g in seq_len(n_groups)) { - expect_lt(max(abs(colSums(contri[, g, ]) - margin[g, ])), 1e-5) + expect_lt(max(abs(rowSums(contri[, g, ]) - margin[, g])), 1e-5) + } + + leaf_no_strict <- predict(bst, X, strict_shape = FALSE, predleaf = TRUE) + for (g in seq_len(n_groups)) { + g_mask <- rep(FALSE, n_groups) + g_mask[g] <- TRUE + expect_equal( + leaf[, , g, 1L], + leaf_no_strict[, g_mask] + ) } } } @@ -562,7 +572,7 @@ test_that("Quantile regression accepts multiple quantiles", { ), nrounds = 15 ) - pred <- predict(model, x, reshape = TRUE) + pred <- predict(model, x) expect_equal(dim(pred)[1], nrow(x)) expect_equal(dim(pred)[2], 3) @@ -590,7 +600,7 @@ test_that("Can use multi-output labels with built-in objectives", { data = dm, nrounds = 5 ) - pred <- predict(model, x, reshape = TRUE) + pred <- predict(model, x) expect_equal(pred[, 1], -pred[, 2]) expect_true(cor(y, pred[, 1]) > 0.9) expect_true(cor(y, pred[, 2]) < -0.9) @@ -619,7 +629,7 @@ test_that("Can use multi-output labels with custom objectives", { data = dm, nrounds = 5 ) - pred <- predict(model, x, reshape = TRUE) + pred <- predict(model, x) expect_equal(pred[, 1], -pred[, 2]) expect_true(cor(y, pred[, 1]) > 0.9) expect_true(cor(y, pred[, 2]) < -0.9) @@ -666,8 +676,8 @@ test_that("Can predict on data.frame objects", { nrounds = 5 ) - pred_mat <- predict(model, xgb.DMatrix(x_mat), nthread = n_threads) - pred_df <- predict(model, x_df, nthread = n_threads) + pred_mat <- predict(model, xgb.DMatrix(x_mat)) + pred_df <- predict(model, x_df) expect_equal(pred_mat, pred_df) }) @@ -737,7 +747,7 @@ test_that("Coefficients from gblinear have the expected shape and names", { expect_equal(nrow(coefs), ncol(x) + 1) expect_equal(ncol(coefs), 3) expect_equal(row.names(coefs), c("(Intercept)", colnames(x))) - pred_auto <- predict(model, x, outputmargin = TRUE, reshape = TRUE) + pred_auto <- predict(model, x, outputmargin = TRUE) pred_manual <- unname(mm %*% coefs) expect_equal(pred_manual, pred_auto, tolerance = 1e-7) }) diff --git a/R-package/tests/testthat/test_booster_slicing.R b/R-package/tests/testthat/test_booster_slicing.R index 711ccd8b6..f80968e06 100644 --- a/R-package/tests/testthat/test_booster_slicing.R +++ b/R-package/tests/testthat/test_booster_slicing.R @@ -9,7 +9,7 @@ model <- xgb.train( data = dm, nrounds = 20 ) -pred <- predict(model, dm, predleaf = TRUE, reshape = TRUE) +pred <- predict(model, dm, predleaf = TRUE) test_that("Slicing full model", { new_model <- xgb.slice.Booster(model, 1, 0) @@ -24,32 +24,32 @@ test_that("Slicing full model", { test_that("Slicing sequence from start", { new_model <- xgb.slice.Booster(model, 1, 10) - new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + new_pred <- predict(new_model, dm, predleaf = TRUE) expect_equal(new_pred, pred[, seq(1, 10)]) new_model <- model[1:10] - new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + new_pred <- predict(new_model, dm, predleaf = TRUE) expect_equal(new_pred, pred[, seq(1, 10)]) }) test_that("Slicing sequence from middle", { new_model <- xgb.slice.Booster(model, 5, 10) - new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + new_pred <- predict(new_model, dm, predleaf = TRUE) expect_equal(new_pred, pred[, seq(5, 10)]) new_model <- model[5:10] - new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + new_pred <- predict(new_model, dm, predleaf = TRUE) expect_equal(new_pred, pred[, seq(5, 10)]) }) test_that("Slicing with non-unit step", { for (s in 2:5) { new_model <- xgb.slice.Booster(model, 1, 17, s) - new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + new_pred <- predict(new_model, dm, predleaf = TRUE) expect_equal(new_pred, pred[, seq(1, 17, s)]) new_model <- model[seq(1, 17, s)] - new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + new_pred <- predict(new_model, dm, predleaf = TRUE) expect_equal(new_pred, pred[, seq(1, 17, s)]) } }) @@ -57,11 +57,11 @@ test_that("Slicing with non-unit step", { test_that("Slicing with non-unit step from middle", { for (s in 2:5) { new_model <- xgb.slice.Booster(model, 4, 17, s) - new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + new_pred <- predict(new_model, dm, predleaf = TRUE) expect_equal(new_pred, pred[, seq(4, 17, s)]) new_model <- model[seq(4, 17, s)] - new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + new_pred <- predict(new_model, dm, predleaf = TRUE) expect_equal(new_pred, pred[, seq(4, 17, s)]) } }) diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R index 548afece3..cca7b88da 100644 --- a/R-package/tests/testthat/test_dmatrix.R +++ b/R-package/tests/testthat/test_dmatrix.R @@ -400,12 +400,10 @@ test_that("xgb.DMatrix: can take multi-dimensional 'base_margin'", { ), nround = 1 ) - pred_only_x <- predict(model, x, nthread = n_threads, reshape = TRUE) + pred_only_x <- predict(model, x) pred_w_base <- predict( model, - xgb.DMatrix(data = x, base_margin = b, nthread = n_threads), - nthread = n_threads, - reshape = TRUE + xgb.DMatrix(data = x, base_margin = b) ) expect_equal(pred_only_x, pred_w_base - b, tolerance = 1e-5) }) diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 16831cf38..7724d6bc5 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -132,31 +132,31 @@ test_that("predict feature contributions works", { tolerance = float_tolerance) # gbtree multiclass - pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE) + pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE) pred_contr <- predict(mbst.Tree, as.matrix(iris[, -5]), predcontrib = TRUE) - expect_is(pred_contr, "list") - expect_length(pred_contr, 3) - for (g in seq_along(pred_contr)) { - expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "(Intercept)")) - expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 1e-5) + expect_is(pred_contr, "array") + expect_length(dim(pred_contr), 3) + for (g in seq_len(dim(pred_contr)[2])) { + expect_equal(colnames(pred_contr[, g, ]), c(colnames(iris[, -5]), "(Intercept)")) + expect_lt(max(abs(rowSums(pred_contr[, g, ]) - pred[, g])), 1e-5) } # gblinear multiclass (set base_score = 0, which is base margin in multiclass) - pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE) + pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE) pred_contr <- predict(mbst.GLM, as.matrix(iris[, -5]), predcontrib = TRUE) - expect_length(pred_contr, 3) + expect_length(dim(pred_contr), 3) coefs_all <- matrix( data = as.numeric(xgb.dump(mbst.GLM)[-c(1, 2, 6)]), ncol = 3, byrow = TRUE ) - for (g in seq_along(pred_contr)) { - expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "(Intercept)")) - expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance) + for (g in seq_along(dim(pred_contr)[2])) { + expect_equal(colnames(pred_contr[, g, ]), c(colnames(iris[, -5]), "(Intercept)")) + expect_lt(max(abs(rowSums(pred_contr[, g, ]) - pred[, g])), float_tolerance) # manual calculation of linear terms coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last pred_contr_manual <- sweep(as.matrix(cbind(iris[, -5], 1)), 2, coefs, FUN = "*") - expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual), + expect_equal(as.numeric(pred_contr[, g, ]), as.numeric(pred_contr_manual), tolerance = float_tolerance) } }) diff --git a/R-package/tests/testthat/test_interactions.R b/R-package/tests/testthat/test_interactions.R index 60cf9d800..1380225c7 100644 --- a/R-package/tests/testthat/test_interactions.R +++ b/R-package/tests/testthat/test_interactions.R @@ -127,41 +127,23 @@ test_that("multiclass feature interactions work", { eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3, nthread = n_threads ) b <- xgb.train(param, dm, 40) - pred <- t( - array( - data = predict(b, dm, outputmargin = TRUE), - dim = c(3, 150) - ) - ) + pred <- predict(b, dm, outputmargin = TRUE) # SHAP contributions: cont <- predict(b, dm, predcontrib = TRUE) - expect_length(cont, 3) - # rewrap them as a 3d array - cont <- array( - data = unlist(cont), - dim = c(150, 5, 3) - ) + expect_length(dim(cont), 3) # make sure for each row they add up to marginal predictions - expect_lt(max(abs(apply(cont, c(1, 3), sum) - pred)), 0.001) + expect_lt(max(abs(apply(cont, c(1, 2), sum) - pred)), 0.001) # SHAP interaction contributions: intr <- predict(b, dm, predinteraction = TRUE) - expect_length(intr, 3) - # rewrap them as a 4d array - intr <- aperm( - a = array( - data = unlist(intr), - dim = c(150, 5, 5, 3) - ), - perm = c(4, 1, 2, 3) # [grp, row, col, col] - ) + expect_length(dim(intr), 4) # check the symmetry expect_lt(max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)), 0.00001) # sums WRT columns must be close to feature contributions - expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))), 0.00001) + expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - cont)), 0.00001) })