|
|
|
|
@@ -112,9 +112,6 @@ xgb.get.handle <- function(object) {
|
|
|
|
|
#' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
|
|
|
|
|
#' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
|
|
|
|
|
#' @param predinteraction Whether to return contributions of feature interactions to individual predictions (see Details).
|
|
|
|
|
#' @param reshape Whether to reshape the vector of predictions to matrix form when there are several
|
|
|
|
|
#' prediction outputs per case. No effect if `predleaf`, `predcontrib`,
|
|
|
|
|
#' or `predinteraction` is `TRUE`.
|
|
|
|
|
#' @param training Whether the prediction result is used for training. For dart booster,
|
|
|
|
|
#' training predicting will perform dropout.
|
|
|
|
|
#' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
|
|
|
|
|
@@ -128,8 +125,24 @@ xgb.get.handle <- function(object) {
|
|
|
|
|
#' of the iterations (rounds) otherwise.
|
|
|
|
|
#'
|
|
|
|
|
#' If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
|
|
|
|
|
#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
|
|
|
|
|
#' type and shape of predictions are invariant to the model type.
|
|
|
|
|
#' @param strict_shape Whether to always return an array with the same dimensions for the given prediction mode
|
|
|
|
|
#' regardless of the model type - meaning that, for example, both a multi-class and a binary classification
|
|
|
|
|
#' model would generate output arrays with the same number of dimensions, with the 'class' dimension having
|
|
|
|
|
#' size equal to '1' for the binary model.
|
|
|
|
|
#'
|
|
|
|
|
#' If passing `FALSE` (the default), dimensions will be simplified according to the model type, so that a
|
|
|
|
|
#' binary classification model for example would not have a redundant dimension for 'class'.
|
|
|
|
|
#'
|
|
|
|
|
#' See documentation for the return type for the exact shape of the output arrays for each prediction mode.
|
|
|
|
|
#' @param avoid_transpose Whether to output the resulting predictions in the same memory layout in which they
|
|
|
|
|
#' are generated by the core XGBoost library, without transposing them to match the expected output shape.
|
|
|
|
|
#'
|
|
|
|
|
#' Internally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major
|
|
|
|
|
#' order, hence the result needs to be transposed in order to have the expected shape when represented as
|
|
|
|
|
#' an R array or matrix, which might be a slow operation.
|
|
|
|
|
#'
|
|
|
|
|
#' If passing `TRUE`, then the result will have dimensions in reverse order - for example, rows
|
|
|
|
|
#' will be the last dimensions instead of the first dimension.
|
|
|
|
|
#' @param base_margin Base margin used for boosting from existing model.
|
|
|
|
|
#'
|
|
|
|
|
#' Note that, if `newdata` is an `xgb.DMatrix` object, this argument will
|
|
|
|
|
@@ -180,28 +193,46 @@ xgb.get.handle <- function(object) {
|
|
|
|
|
#' Note that converting a matrix to [xgb.DMatrix()] uses multiple threads too.
|
|
|
|
|
#'
|
|
|
|
|
#' @return
|
|
|
|
|
#' The return type depends on `strict_shape`. If `FALSE` (default):
|
|
|
|
|
#' - For regression or binary classification: A vector of length `nrows(newdata)`.
|
|
|
|
|
#' - For multiclass classification: A vector of length `num_class * nrows(newdata)` or
|
|
|
|
|
#' a `(nrows(newdata), num_class)` matrix, depending on the `reshape` value.
|
|
|
|
|
#' - When `predleaf = TRUE`: A matrix with one column per tree.
|
|
|
|
|
#' - When `predcontrib = TRUE`: When not multiclass, a matrix with
|
|
|
|
|
#' ` num_features + 1` columns. The last "+ 1" column corresponds to the baseline value.
|
|
|
|
|
#' In the multiclass case, a list of `num_class` such matrices.
|
|
|
|
|
#' The contribution values are on the scale of untransformed margin
|
|
|
|
|
#' (e.g., for binary classification, the values are log-odds deviations from the baseline).
|
|
|
|
|
#' - When `predinteraction = TRUE`: When not multiclass, the output is a 3d array of
|
|
|
|
|
#' dimension `c(nrow, num_features + 1, num_features + 1)`. The off-diagonal (in the last two dimensions)
|
|
|
|
|
#' elements represent different feature interaction contributions. The array is symmetric WRT the last
|
|
|
|
|
#' two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last dimension should
|
|
|
|
|
#' produce practically the same result as `predcontrib = TRUE`.
|
|
|
|
|
#' In the multiclass case, a list of `num_class` such arrays.
|
|
|
|
|
#' A numeric vector or array, with corresponding dimensions depending on the prediction mode and on
|
|
|
|
|
#' parameter `strict_shape` as follows:
|
|
|
|
|
#'
|
|
|
|
|
#' When `strict_shape = TRUE`, the output is always an array:
|
|
|
|
|
#' - For normal predictions, the output has dimension `(num_class, nrow(newdata))`.
|
|
|
|
|
#' - For `predcontrib = TRUE`, the dimension is `(ncol(newdata) + 1, num_class, nrow(newdata))`.
|
|
|
|
|
#' - For `predinteraction = TRUE`, the dimension is `(ncol(newdata) + 1, ncol(newdata) + 1, num_class, nrow(newdata))`.
|
|
|
|
|
#' - For `predleaf = TRUE`, the dimension is `(n_trees_in_forest, num_class, n_iterations, nrow(newdata))`.
|
|
|
|
|
#' If passing `strict_shape=FALSE`:\itemize{
|
|
|
|
|
#' \item For regression or binary classification: a vector of length `nrows`.
|
|
|
|
|
#' \item For multi-class and multi-target objectives: a matrix of dimensions `[nrows, ngroups]`.
|
|
|
|
|
#'
|
|
|
|
|
#' Note that objective variant `multi:softmax` defaults towards predicting most likely class (a vector
|
|
|
|
|
#' `nrows`) instead of per-class probabilities.
|
|
|
|
|
#' \item For `predleaf`: a matrix with one column per tree.
|
|
|
|
|
#'
|
|
|
|
|
#' For multi-class / multi-target, they will be arranged so that columns in the output will have
|
|
|
|
|
#' the leafs from one group followed by leafs of the other group (e.g. order will be `group1:feat1`,
|
|
|
|
|
#' `group1:feat2`, ..., `group2:feat1`, `group2:feat2`, ...).
|
|
|
|
|
#' \item For `predcontrib`: when not multi-class / multi-target, a matrix with dimensions
|
|
|
|
|
#' `[nrows, nfeats+1]`. The last "+ 1" column corresponds to the baseline value.
|
|
|
|
|
#'
|
|
|
|
|
#' For multi-class and multi-target objectives, will be an array with dimensions `[nrows, ngroups, nfeats+1]`.
|
|
|
|
|
#'
|
|
|
|
|
#' The contribution values are on the scale of untransformed margin (e.g., for binary classification,
|
|
|
|
|
#' the values are log-odds deviations from the baseline).
|
|
|
|
|
#' \item For `predinteraction`: when not multi-class / multi-target, the output is a 3D array of
|
|
|
|
|
#' dimensions `[nrows, nfeats+1, nfeats+1]`. The off-diagonal (in the last two dimensions)
|
|
|
|
|
#' elements represent different feature interaction contributions. The array is symmetric w.r.t. the last
|
|
|
|
|
#' two dimensions. The "+ 1" columns corresponds to the baselines. Summing this array along the last
|
|
|
|
|
#' dimension should produce practically the same result as `predcontrib = TRUE`.
|
|
|
|
|
#'
|
|
|
|
|
#' For multi-class and multi-target, will be a 4D array with dimensions `[nrows, ngroups, nfeats+1, nfeats+1]`
|
|
|
|
|
#' }
|
|
|
|
|
#'
|
|
|
|
|
#' If passing `strict_shape=FALSE`, the result is always an array:\itemize{
|
|
|
|
|
#' \item For normal predictions, the dimension is `[nrows, ngroups]`.
|
|
|
|
|
#' \item For `predcontrib=TRUE`, the dimension is `[nrows, ngroups, nfeats+1]`.
|
|
|
|
|
#' \item For `predinteraction=TRUE`, the dimension is `[nrows, ngroups, nfeats+1, nfeats+1]`.
|
|
|
|
|
#' \item For `predleaf=TRUE`, the dimension is `[nrows, niter, ngroups, num_parallel_tree]`.
|
|
|
|
|
#' }
|
|
|
|
|
#'
|
|
|
|
|
#' If passing `avoid_transpose=TRUE`, then the dimensions in all cases will be in reverse order - for
|
|
|
|
|
#' example, for `predinteraction`, they will be `[nfeats+1, nfeats+1, ngroups, nrows]`
|
|
|
|
|
#' instead of `[nrows, ngroups, nfeats+1, nfeats+1]`.
|
|
|
|
|
#' @seealso [xgb.train()]
|
|
|
|
|
#' @references
|
|
|
|
|
#' 1. Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions",
|
|
|
|
|
@@ -279,8 +310,6 @@ xgb.get.handle <- function(object) {
|
|
|
|
|
#' # predict for softmax returns num_class probability numbers per case:
|
|
|
|
|
#' pred <- predict(bst, as.matrix(iris[, -5]))
|
|
|
|
|
#' str(pred)
|
|
|
|
|
#' # reshape it to a num_class-columns matrix
|
|
|
|
|
#' pred <- matrix(pred, ncol = num_class, byrow = TRUE)
|
|
|
|
|
#' # convert the probabilities to softmax labels
|
|
|
|
|
#' pred_labels <- max.col(pred) - 1
|
|
|
|
|
#' # the following should result in the same error as seen in the last iteration
|
|
|
|
|
@@ -311,8 +340,11 @@ xgb.get.handle <- function(object) {
|
|
|
|
|
#' @export
|
|
|
|
|
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
|
|
|
|
|
predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
|
|
|
|
|
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE,
|
|
|
|
|
training = FALSE, iterationrange = NULL, strict_shape = FALSE, avoid_transpose = FALSE,
|
|
|
|
|
validate_features = FALSE, base_margin = NULL, ...) {
|
|
|
|
|
if (NROW(list(...))) {
|
|
|
|
|
warning("Passed unused prediction arguments: ", paste(names(list(...)), collapse = ", "), ".")
|
|
|
|
|
}
|
|
|
|
|
if (validate_features) {
|
|
|
|
|
newdata <- validate.features(object, newdata)
|
|
|
|
|
}
|
|
|
|
|
@@ -415,10 +447,9 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
|
|
|
|
return(val)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
## We set strict_shape to TRUE then drop the dimensions conditionally
|
|
|
|
|
args <- list(
|
|
|
|
|
training = box(training),
|
|
|
|
|
strict_shape = box(TRUE),
|
|
|
|
|
strict_shape = as.logical(strict_shape),
|
|
|
|
|
iteration_begin = box(as.integer(iterationrange[1])),
|
|
|
|
|
iteration_end = box(as.integer(iterationrange[2])),
|
|
|
|
|
type = box(as.integer(0))
|
|
|
|
|
@@ -445,96 +476,36 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
|
|
|
|
|
|
|
|
|
json_conf <- jsonlite::toJSON(args, auto_unbox = TRUE)
|
|
|
|
|
if (is_dmatrix) {
|
|
|
|
|
predts <- .Call(
|
|
|
|
|
arr <- .Call(
|
|
|
|
|
XGBoosterPredictFromDMatrix_R, xgb.get.handle(object), newdata, json_conf
|
|
|
|
|
)
|
|
|
|
|
} else if (use_as_dense_matrix) {
|
|
|
|
|
predts <- .Call(
|
|
|
|
|
arr <- .Call(
|
|
|
|
|
XGBoosterPredictFromDense_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
|
|
|
|
|
)
|
|
|
|
|
} else if (use_as_csr_matrix) {
|
|
|
|
|
predts <- .Call(
|
|
|
|
|
arr <- .Call(
|
|
|
|
|
XGBoosterPredictFromCSR_R, xgb.get.handle(object), csr_data, missing, json_conf, base_margin
|
|
|
|
|
)
|
|
|
|
|
} else if (use_as_df) {
|
|
|
|
|
predts <- .Call(
|
|
|
|
|
arr <- .Call(
|
|
|
|
|
XGBoosterPredictFromColumnar_R, xgb.get.handle(object), newdata, missing, json_conf, base_margin
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
names(predts) <- c("shape", "results")
|
|
|
|
|
shape <- predts$shape
|
|
|
|
|
arr <- predts$results
|
|
|
|
|
|
|
|
|
|
n_ret <- length(arr)
|
|
|
|
|
if (n_row != shape[1]) {
|
|
|
|
|
stop("Incorrect predict shape.")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
.Call(XGSetArrayDimInplace_R, arr, rev(shape))
|
|
|
|
|
|
|
|
|
|
cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "(Intercept)") else NULL
|
|
|
|
|
n_groups <- shape[2]
|
|
|
|
|
|
|
|
|
|
## Needed regardless of whether strict shape is being used.
|
|
|
|
|
if (predcontrib) {
|
|
|
|
|
.Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, NULL, NULL))
|
|
|
|
|
} else if (predinteraction) {
|
|
|
|
|
.Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, cnames, NULL, NULL))
|
|
|
|
|
}
|
|
|
|
|
if (strict_shape) {
|
|
|
|
|
return(arr) # strict shape is calculated by libxgboost uniformly.
|
|
|
|
|
if ((predcontrib || predinteraction) && !is.null(colnames(newdata))) {
|
|
|
|
|
cnames <- c(colnames(newdata), "(Intercept)")
|
|
|
|
|
dim_names <- vector(mode = "list", length = length(dim(arr)))
|
|
|
|
|
dim_names[[1L]] <- cnames
|
|
|
|
|
if (predinteraction) dim_names[[2L]] <- cnames
|
|
|
|
|
.Call(XGSetArrayDimNamesInplace_R, arr, dim_names)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (predleaf) {
|
|
|
|
|
## Predict leaf
|
|
|
|
|
if (n_ret == n_row) {
|
|
|
|
|
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
|
|
|
|
|
} else {
|
|
|
|
|
arr <- matrix(arr, nrow = n_row, byrow = TRUE)
|
|
|
|
|
}
|
|
|
|
|
} else if (predcontrib) {
|
|
|
|
|
## Predict contribution
|
|
|
|
|
arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
|
|
|
|
|
if (n_ret == n_row) {
|
|
|
|
|
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
|
|
|
|
|
.Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
|
|
|
|
|
} else if (n_groups != 1) {
|
|
|
|
|
## turns array into list of matrices
|
|
|
|
|
arr <- lapply(seq_len(n_groups), function(g) arr[g, , ])
|
|
|
|
|
} else {
|
|
|
|
|
## remove the first axis (group)
|
|
|
|
|
newdim <- dim(arr)[2:3]
|
|
|
|
|
newdn <- dimnames(arr)[2:3]
|
|
|
|
|
arr <- arr[1, , ]
|
|
|
|
|
.Call(XGSetArrayDimInplace_R, arr, newdim)
|
|
|
|
|
.Call(XGSetArrayDimNamesInplace_R, arr, newdn)
|
|
|
|
|
}
|
|
|
|
|
} else if (predinteraction) {
|
|
|
|
|
## Predict interaction
|
|
|
|
|
arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
|
|
|
|
|
if (n_ret == n_row) {
|
|
|
|
|
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
|
|
|
|
|
.Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
|
|
|
|
|
} else if (n_groups != 1) {
|
|
|
|
|
## turns array into list of matrices
|
|
|
|
|
arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ])
|
|
|
|
|
} else {
|
|
|
|
|
## remove the first axis (group)
|
|
|
|
|
arr <- arr[1, , , , drop = FALSE]
|
|
|
|
|
newdim <- dim(arr)[2:4]
|
|
|
|
|
newdn <- dimnames(arr)[2:4]
|
|
|
|
|
.Call(XGSetArrayDimInplace_R, arr, newdim)
|
|
|
|
|
.Call(XGSetArrayDimNamesInplace_R, arr, newdn)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
## Normal prediction
|
|
|
|
|
if (reshape && n_groups != 1) {
|
|
|
|
|
arr <- matrix(arr, ncol = n_groups, byrow = TRUE)
|
|
|
|
|
} else {
|
|
|
|
|
.Call(XGSetArrayDimInplace_R, arr, NULL)
|
|
|
|
|
}
|
|
|
|
|
if (!avoid_transpose && is.array(arr)) {
|
|
|
|
|
arr <- aperm(arr)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return(arr)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|