SHAP values for feature contributions (#2438)
* SHAP values for feature contributions * Fix commenting error * New polynomial time SHAP value estimation algorithm * Update API to support SHAP values * Fix merge conflicts with updates in master * Correct submodule hashes * Fix variable sized stack allocation * Make lint happy * Add docs * Fix typo * Adjust tolerances * Remove unneeded def * Fixed cpp test setup * Updated R API and cleaned up * Fixed test typo
This commit is contained in:
parent
ff9180cd73
commit
78c4188cec
@ -39,7 +39,7 @@ xgb.handleToBooster <- function(handle, raw = NULL) {
|
||||
is.null.handle <- function(handle) {
|
||||
if (!identical(class(handle), "xgb.Booster.handle"))
|
||||
stop("argument type must be xgb.Booster.handle")
|
||||
|
||||
|
||||
if (is.null(handle) || .Call(XGCheckNullPtr_R, handle))
|
||||
return(TRUE)
|
||||
return(FALSE)
|
||||
@ -61,49 +61,49 @@ xgb.get.handle <- function(object) {
|
||||
|
||||
#' Restore missing parts of an incomplete xgb.Booster object.
|
||||
#'
|
||||
#' It attempts to complete an \code{xgb.Booster} object by restoring either its missing
|
||||
#' It attempts to complete an \code{xgb.Booster} object by restoring either its missing
|
||||
#' raw model memory dump (when it has no \code{raw} data but its \code{xgb.Booster.handle} is valid)
|
||||
#' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid
|
||||
#' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid
|
||||
#' but it has a raw Booster memory dump).
|
||||
#'
|
||||
#'
|
||||
#' @param object object of class \code{xgb.Booster}
|
||||
#' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data
|
||||
#' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data
|
||||
#' when it doesn't already exist.
|
||||
#'
|
||||
#'
|
||||
#' @details
|
||||
#'
|
||||
#'
|
||||
#' While this method is primarily for internal use, it might be useful in some practical situations.
|
||||
#'
|
||||
#'
|
||||
#' E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
|
||||
#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
|
||||
#' should still work for such a model object since those methods would be using
|
||||
#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
|
||||
#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
|
||||
#' should still work for such a model object since those methods would be using
|
||||
#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
|
||||
#' \code{xgb.Booster.complete} function explicitely once after loading a model as an R-object.
|
||||
#' That would prevent further repeated implicit reconstruction of an internal booster model.
|
||||
#'
|
||||
#' @return
|
||||
#'
|
||||
#' @return
|
||||
#' An object of \code{xgb.Booster} class.
|
||||
#'
|
||||
#'
|
||||
#' @examples
|
||||
#'
|
||||
#'
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
#' saveRDS(bst, "xgb.model.rds")
|
||||
#'
|
||||
#'
|
||||
#' bst1 <- readRDS("xgb.model.rds")
|
||||
#' # the handle is invalid:
|
||||
#' print(bst1$handle)
|
||||
#'
|
||||
#'
|
||||
#' bst1 <- xgb.Booster.complete(bst1)
|
||||
#' # now the handle points to a valid internal booster model:
|
||||
#' print(bst1$handle)
|
||||
#'
|
||||
#'
|
||||
#' @export
|
||||
xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||
if (!inherits(object, "xgb.Booster"))
|
||||
stop("argument type must be xgb.Booster")
|
||||
|
||||
|
||||
if (is.null.handle(object$handle)) {
|
||||
object$handle <- xgb.Booster.handle(modelfile = object$raw)
|
||||
} else {
|
||||
@ -114,88 +114,90 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||
}
|
||||
|
||||
#' Predict method for eXtreme Gradient Boosting model
|
||||
#'
|
||||
#'
|
||||
#' Predicted values based on either xgboost model or model handle object.
|
||||
#'
|
||||
#'
|
||||
#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}
|
||||
#' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or \code{xgb.DMatrix}.
|
||||
#' @param missing Missing is only used when input is dense matrix. Pick a float value that represents
|
||||
#' missing values in data (e.g., sometimes 0 or some other extreme value is used).
|
||||
#' @param outputmargin whether the prediction should be returned in the for of original untransformed
|
||||
#' sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
|
||||
#' @param outputmargin whether the prediction should be returned in the for of original untransformed
|
||||
#' sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
|
||||
#' logistic regression would result in predictions for log-odds instead of probabilities.
|
||||
#' @param ntreelimit limit the number of model's trees or boosting iterations used in prediction (see Details).
|
||||
#' It will use all the trees by default (\code{NULL} value).
|
||||
#' @param predleaf whether predict leaf index instead.
|
||||
#' @param predcontrib whether to return feature contributions to individual predictions instead (see Details).
|
||||
#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
|
||||
#' @param approxcontrib whether to use a fast approximation for feature contributions (see Details).
|
||||
#' @param reshape whether to reshape the vector of predictions to a matrix form when there are several
|
||||
#' prediction outputs per case. This option has no effect when \code{predleaf = TRUE}.
|
||||
#' @param ... Parameters passed to \code{predict.xgb.Booster}
|
||||
#'
|
||||
#' @details
|
||||
#'
|
||||
#' @details
|
||||
#' Note that \code{ntreelimit} is not necessarily equal to the number of boosting iterations
|
||||
#' and it is not necessarily equal to the number of trees in a model.
|
||||
#' E.g., in a random forest-like model, \code{ntreelimit} would limit the number of trees.
|
||||
#' But for multiclass classification, while there are multiple trees per iteration,
|
||||
#' But for multiclass classification, while there are multiple trees per iteration,
|
||||
#' \code{ntreelimit} limits the number of boosting iterations.
|
||||
#'
|
||||
#' Also note that \code{ntreelimit} would currently do nothing for predictions from gblinear,
|
||||
#'
|
||||
#' Also note that \code{ntreelimit} would currently do nothing for predictions from gblinear,
|
||||
#' since gblinear doesn't keep its boosting history.
|
||||
#'
|
||||
#' One possible practical applications of the \code{predleaf} option is to use the model
|
||||
#' as a generator of new features which capture non-linearity and interactions,
|
||||
#'
|
||||
#' One possible practical applications of the \code{predleaf} option is to use the model
|
||||
#' as a generator of new features which capture non-linearity and interactions,
|
||||
#' e.g., as implemented in \code{\link{xgb.create.features}}.
|
||||
#'
|
||||
#'
|
||||
#' Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
|
||||
#' individual predictions. For "gblinear" booster, feature contributions are simply linear terms
|
||||
#' (feature_beta * feature_value). For "gbtree" booster, feature contribution is calculated
|
||||
#' as a sum of average contribution of that feature's split nodes across all trees to an
|
||||
#' individual prediction, following the idea explained in
|
||||
#' \url{http://blog.datadive.net/interpreting-random-forests/}.
|
||||
#'
|
||||
#' @return
|
||||
#' (feature_beta * feature_value). For "gbtree" booster, feature contributions are SHAP
|
||||
#' values (https://arxiv.org/abs/1706.06060) that sum to the difference between the expected output
|
||||
#' of the model and the current prediction (where the hessian weights are used to compute the expectations).
|
||||
#' Setting \code{approxcontrib = TRUE} approximates these values following the idea explained
|
||||
#' in \url{http://blog.datadive.net/interpreting-random-forests/}.
|
||||
#'
|
||||
#' @return
|
||||
#' For regression or binary classification, it returns a vector of length \code{nrows(newdata)}.
|
||||
#' For multiclass classification, either a \code{num_class * nrows(newdata)} vector or
|
||||
#' a \code{(nrows(newdata), num_class)} dimension matrix is returned, depending on
|
||||
#' For multiclass classification, either a \code{num_class * nrows(newdata)} vector or
|
||||
#' a \code{(nrows(newdata), num_class)} dimension matrix is returned, depending on
|
||||
#' the \code{reshape} value.
|
||||
#'
|
||||
#' When \code{predleaf = TRUE}, the output is a matrix object with the
|
||||
#'
|
||||
#' When \code{predleaf = TRUE}, the output is a matrix object with the
|
||||
#' number of columns corresponding to the number of trees.
|
||||
#'
|
||||
#'
|
||||
#' When \code{predcontrib = TRUE} and it is not a multiclass setting, the output is a matrix object with
|
||||
#' \code{num_features + 1} columns. The last "+ 1" column in a matrix corresponds to bias.
|
||||
#' For a multiclass case, a list of \code{num_class} elements is returned, where each element is
|
||||
#' such a matrix. The contribution values are on the scale of untransformed margin
|
||||
#' such a matrix. The contribution values are on the scale of untransformed margin
|
||||
#' (e.g., for binary classification would mean that the contributions are log-odds deviations from bias).
|
||||
#'
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{xgb.train}}.
|
||||
#'
|
||||
#'
|
||||
#' @examples
|
||||
#' ## binary classification:
|
||||
#'
|
||||
#'
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#'
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
#'
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
#' eta = 0.5, nthread = 2, nrounds = 5, objective = "binary:logistic")
|
||||
#' # use all trees by default
|
||||
#' pred <- predict(bst, test$data)
|
||||
#' # use only the 1st tree
|
||||
#' pred1 <- predict(bst, test$data, ntreelimit = 1)
|
||||
#'
|
||||
#'
|
||||
#' # Predicting tree leafs:
|
||||
#' # the result is an nsamples X ntrees matrix
|
||||
#' pred_leaf <- predict(bst, test$data, predleaf = TRUE)
|
||||
#' str(pred_leaf)
|
||||
#'
|
||||
#'
|
||||
#' # Predicting feature contributions to predictions:
|
||||
#' # the result is an nsamples X (nfeatures + 1) matrix
|
||||
#' pred_contr <- predict(bst, test$data, predcontrib = TRUE)
|
||||
#' str(pred_contr)
|
||||
#' # verify that contributions' sums are equal to log-odds of predictions (up to foat precision):
|
||||
#' # verify that contributions' sums are equal to log-odds of predictions (up to float precision):
|
||||
#' summary(rowSums(pred_contr) - qlogis(pred))
|
||||
#' # for the 1st record, let's inspect its features that had non-zero contribution to prediction:
|
||||
#' contr1 <- pred_contr[1,]
|
||||
@ -206,10 +208,10 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||
#' par(mar = old_mar + c(0,7,0,0))
|
||||
#' barplot(contr1, horiz = TRUE, las = 2, xlab = "contribution to prediction in log-odds")
|
||||
#' par(mar = old_mar)
|
||||
#'
|
||||
#'
|
||||
#'
|
||||
#'
|
||||
#' ## multiclass classification in iris dataset:
|
||||
#'
|
||||
#'
|
||||
#' lb <- as.numeric(iris$Species) - 1
|
||||
#' num_class <- 3
|
||||
#' set.seed(11)
|
||||
@ -225,7 +227,7 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||
#' pred_labels <- max.col(pred) - 1
|
||||
#' # the following should result in the same error as seen in the last iteration
|
||||
#' sum(pred_labels != lb)/length(lb)
|
||||
#'
|
||||
#'
|
||||
#' # compare that to the predictions from softmax:
|
||||
#' set.seed(11)
|
||||
#' bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
|
||||
@ -234,14 +236,14 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||
#' pred <- predict(bst, as.matrix(iris[, -5]))
|
||||
#' str(pred)
|
||||
#' all.equal(pred, pred_labels)
|
||||
#' # prediction from using only 5 iterations should result
|
||||
#' # prediction from using only 5 iterations should result
|
||||
#' # in the same error as seen in iteration 5:
|
||||
#' pred5 <- predict(bst, as.matrix(iris[, -5]), ntreelimit=5)
|
||||
#' sum(pred5 != lb)/length(lb)
|
||||
#'
|
||||
#'
|
||||
#'
|
||||
#'
|
||||
#' ## random forest-like model of 25 trees for binary classification:
|
||||
#'
|
||||
#'
|
||||
#' set.seed(11)
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 5,
|
||||
#' nthread = 2, nrounds = 1, objective = "binary:logistic",
|
||||
@ -258,7 +260,7 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||
#' @rdname predict.xgb.Booster
|
||||
#' @export
|
||||
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
|
||||
predleaf = FALSE, predcontrib = FALSE, reshape = FALSE, ...) {
|
||||
predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, reshape = FALSE, ...) {
|
||||
|
||||
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
||||
if (!inherits(newdata, "xgb.DMatrix"))
|
||||
@ -269,18 +271,18 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
ntreelimit <- 0
|
||||
if (ntreelimit < 0)
|
||||
stop("ntreelimit cannot be negative")
|
||||
|
||||
option <- 0L + 1L * as.logical(outputmargin) + 2L * as.logical(predleaf) + 4L * as.logical(predcontrib)
|
||||
|
||||
|
||||
option <- 0L + 1L * as.logical(outputmargin) + 2L * as.logical(predleaf) + 4L * as.logical(predcontrib) + 8L * as.logical(approxcontrib)
|
||||
|
||||
ret <- .Call(XGBoosterPredict_R, object$handle, newdata, option[1], as.integer(ntreelimit))
|
||||
|
||||
|
||||
n_ret <- length(ret)
|
||||
n_row <- nrow(newdata)
|
||||
npred_per_case <- n_ret / n_row
|
||||
|
||||
|
||||
if (n_ret %% n_row != 0)
|
||||
stop("prediction length ", n_ret, " is not multiple of nrows(newdata) ", n_row)
|
||||
|
||||
|
||||
if (predleaf) {
|
||||
ret <- if (n_ret == n_row) {
|
||||
matrix(ret, ncol = 1)
|
||||
@ -325,9 +327,9 @@ predict.xgb.Booster.handle <- function(object, ...) {
|
||||
#'
|
||||
#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}.
|
||||
#' @param name a non-empty character string specifying which attribute is to be accessed.
|
||||
#' @param value a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-}
|
||||
#' it's a list (or an object coercible to a list) with the names of attributes to set
|
||||
#' and the elements corresponding to attribute values.
|
||||
#' @param value a value of an attribute for \code{xgb.attr<-}; for \code{xgb.attributes<-}
|
||||
#' it's a list (or an object coercible to a list) with the names of attributes to set
|
||||
#' and the elements corresponding to attribute values.
|
||||
#' Non-character values are converted to character.
|
||||
#' When attribute value is not a scalar, only the first index is used.
|
||||
#' Use \code{NULL} to remove an attribute.
|
||||
@ -336,32 +338,32 @@ predict.xgb.Booster.handle <- function(object, ...) {
|
||||
#' The primary purpose of xgboost model attributes is to store some meta-data about the model.
|
||||
#' Note that they are a separate concept from the object attributes in R.
|
||||
#' Specifically, they refer to key-value strings that can be attached to an xgboost model,
|
||||
#' stored together with the model's binary representation, and accessed later
|
||||
#' stored together with the model's binary representation, and accessed later
|
||||
#' (from R or any other interface).
|
||||
#' In contrast, any R-attribute assigned to an R-object of \code{xgb.Booster} class
|
||||
#' would not be saved by \code{xgb.save} because an xgboost model is an external memory object
|
||||
#' and its serialization is handled externally.
|
||||
#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
|
||||
#' change the value of that parameter for a model.
|
||||
#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
|
||||
#' change the value of that parameter for a model.
|
||||
#' Use \code{\link{xgb.parameters<-}} to set or change model parameters.
|
||||
#'
|
||||
#'
|
||||
#' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
|
||||
#' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
|
||||
#' That would only matter if attributes need to be set many times.
|
||||
#' Note, however, that when feeding a handle of an \code{xgb.Booster} object to the attribute setters,
|
||||
#' the raw model cache of an \code{xgb.Booster} object would not be automatically updated,
|
||||
#' the raw model cache of an \code{xgb.Booster} object would not be automatically updated,
|
||||
#' and it would be user's responsibility to call \code{xgb.save.raw} to update it.
|
||||
#'
|
||||
#' The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
|
||||
#'
|
||||
#' The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
|
||||
#' but it doesn't delete the other existing attributes.
|
||||
#'
|
||||
#'
|
||||
#' @return
|
||||
#' \code{xgb.attr} returns either a string value of an attribute
|
||||
#' \code{xgb.attr} returns either a string value of an attribute
|
||||
#' or \code{NULL} if an attribute wasn't stored in a model.
|
||||
#'
|
||||
#' \code{xgb.attributes} returns a list of all attribute stored in a model
|
||||
#'
|
||||
#' \code{xgb.attributes} returns a list of all attribute stored in a model
|
||||
#' or \code{NULL} if a model has no stored attributes.
|
||||
#'
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
@ -377,13 +379,13 @@ predict.xgb.Booster.handle <- function(object, ...) {
|
||||
#' bst1 <- xgb.load('xgb.model')
|
||||
#' print(xgb.attr(bst1, "my_attribute"))
|
||||
#' print(xgb.attributes(bst1))
|
||||
#'
|
||||
#'
|
||||
#' # deletion:
|
||||
#' xgb.attr(bst1, "my_attribute") <- NULL
|
||||
#' print(xgb.attributes(bst1))
|
||||
#' xgb.attributes(bst1) <- list(a = NULL, b = NULL)
|
||||
#' print(xgb.attributes(bst1))
|
||||
#'
|
||||
#'
|
||||
#' @rdname xgb.attr
|
||||
#' @export
|
||||
xgb.attr <- function(object, name) {
|
||||
@ -464,7 +466,7 @@ xgb.attributes <- function(object) {
|
||||
#' @details
|
||||
#' Note that the setter would usually work more efficiently for \code{xgb.Booster.handle}
|
||||
#' than for \code{xgb.Booster}, since only just a handle would need to be copied.
|
||||
#'
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
@ -473,7 +475,7 @@ xgb.attributes <- function(object) {
|
||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
#'
|
||||
#' xgb.parameters(bst) <- list(eta = 0.1)
|
||||
#'
|
||||
#'
|
||||
#' @rdname xgb.parameters
|
||||
#' @export
|
||||
`xgb.parameters<-` <- function(object, value) {
|
||||
@ -503,28 +505,28 @@ xgb.ntree <- function(bst) {
|
||||
|
||||
|
||||
#' Print xgb.Booster
|
||||
#'
|
||||
#'
|
||||
#' Print information about xgb.Booster.
|
||||
#'
|
||||
#'
|
||||
#' @param x an xgb.Booster object
|
||||
#' @param verbose whether to print detailed data (e.g., attribute values)
|
||||
#' @param ... not currently used
|
||||
#'
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
#' attr(bst, 'myattr') <- 'memo'
|
||||
#'
|
||||
#'
|
||||
#' print(bst)
|
||||
#' print(bst, verbose=TRUE)
|
||||
#'
|
||||
#' @method print xgb.Booster
|
||||
#' @method print xgb.Booster
|
||||
#' @export
|
||||
print.xgb.Booster <- function(x, verbose = FALSE, ...) {
|
||||
cat('##### xgb.Booster\n')
|
||||
|
||||
|
||||
valid_handle <- is.null.handle(x$handle)
|
||||
if (!valid_handle)
|
||||
cat("Handle is invalid! Suggest using xgb.Booster.complete\n")
|
||||
@ -539,10 +541,10 @@ print.xgb.Booster <- function(x, verbose = FALSE, ...) {
|
||||
cat('call:\n ')
|
||||
print(x$call)
|
||||
}
|
||||
|
||||
|
||||
if (!is.null(x$params)) {
|
||||
cat('params (as set within xgb.train):\n')
|
||||
cat( ' ',
|
||||
cat( ' ',
|
||||
paste(names(x$params),
|
||||
paste0('"', unlist(x$params), '"'),
|
||||
sep = ' = ', collapse = ', '), '\n', sep = '')
|
||||
@ -562,7 +564,7 @@ print.xgb.Booster <- function(x, verbose = FALSE, ...) {
|
||||
cat(' ', paste(names(attrs), collapse = ', '), '\n', sep = '')
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!is.null(x$callbacks) && length(x$callbacks) > 0) {
|
||||
cat('callbacks:\n')
|
||||
lapply(callback.calls(x$callbacks), function(x) {
|
||||
@ -570,14 +572,14 @@ print.xgb.Booster <- function(x, verbose = FALSE, ...) {
|
||||
print(x)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
if (!is.null(x$feature_names))
|
||||
cat('# of features:', length(x$feature_names), '\n')
|
||||
|
||||
|
||||
cat('niter: ', x$niter, '\n', sep = '')
|
||||
# TODO: uncomment when faster xgb.ntree is implemented
|
||||
#cat('ntree: ', xgb.ntree(x), '\n', sep='')
|
||||
|
||||
|
||||
for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
|
||||
'evaluation_log','niter','feature_names'))) {
|
||||
if (is.atomic(x[[n]])) {
|
||||
@ -587,11 +589,11 @@ print.xgb.Booster <- function(x, verbose = FALSE, ...) {
|
||||
print(x[[n]])
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!is.null(x$evaluation_log)) {
|
||||
cat('evaluation_log:\n')
|
||||
print(x$evaluation_log, row.names = FALSE, topn = 2)
|
||||
}
|
||||
|
||||
|
||||
invisible(x)
|
||||
}
|
||||
|
||||
@ -52,9 +52,9 @@ test_that("xgb.dump works", {
|
||||
|
||||
test_that("xgb.dump works for gblinear", {
|
||||
expect_length(xgb.dump(bst.GLM), 14)
|
||||
# also make sure that it works properly for a sparse model where some coefficients
|
||||
# also make sure that it works properly for a sparse model where some coefficients
|
||||
# are 0 from setting large L1 regularization:
|
||||
bst.GLM.sp <- xgboost(data = sparse_matrix, label = label, eta = 1, nthread = 2, nrounds = 1,
|
||||
bst.GLM.sp <- xgboost(data = sparse_matrix, label = label, eta = 1, nthread = 2, nrounds = 1,
|
||||
alpha=2, objective = "binary:logistic", booster = "gblinear")
|
||||
d.sp <- xgb.dump(bst.GLM.sp)
|
||||
expect_length(d.sp, 14)
|
||||
@ -80,28 +80,35 @@ test_that("predict feature contributions works", {
|
||||
expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))
|
||||
expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
|
||||
pred <- predict(bst.Tree, sparse_matrix, outputmargin = TRUE)
|
||||
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-6)
|
||||
|
||||
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
|
||||
|
||||
# gbtree binary classifier (approximate method)
|
||||
expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
|
||||
expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))
|
||||
expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
|
||||
pred <- predict(bst.Tree, sparse_matrix, outputmargin = TRUE)
|
||||
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
|
||||
|
||||
# gblinear binary classifier
|
||||
expect_error(pred_contr <- predict(bst.GLM, sparse_matrix, predcontrib = TRUE), regexp = NA)
|
||||
expect_equal(dim(pred_contr), c(nrow(sparse_matrix), ncol(sparse_matrix) + 1))
|
||||
expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
|
||||
pred <- predict(bst.GLM, sparse_matrix, outputmargin = TRUE)
|
||||
expect_lt(max(abs(rowSums(pred_contr) - pred)), 2e-6)
|
||||
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
|
||||
# manual calculation of linear terms
|
||||
coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric
|
||||
coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
|
||||
pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*")
|
||||
expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), 2e-6)
|
||||
expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), 1e-5)
|
||||
|
||||
# gbtree multiclass
|
||||
# gbtree multiclass
|
||||
pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
|
||||
pred_contr <- predict(mbst.Tree, as.matrix(iris[, -5]), predcontrib = TRUE)
|
||||
expect_is(pred_contr, "list")
|
||||
expect_length(pred_contr, 3)
|
||||
for (g in seq_along(pred_contr)) {
|
||||
expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
|
||||
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 2e-6)
|
||||
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 1e-5)
|
||||
}
|
||||
|
||||
# gblinear multiclass (set base_score = 0, which is base margin in multiclass)
|
||||
@ -192,10 +199,10 @@ test_that("xgb.model.dt.tree works with and without feature names", {
|
||||
expect_equal(names.dt.trees, names(dt.tree))
|
||||
expect_equal(dim(dt.tree), c(188, 10))
|
||||
expect_output(str(dt.tree), 'Feature.*\\"Age\\"')
|
||||
|
||||
|
||||
dt.tree.0 <- xgb.model.dt.tree(model = bst.Tree)
|
||||
expect_equal(dt.tree, dt.tree.0)
|
||||
|
||||
|
||||
# when model contains no feature names:
|
||||
bst.Tree.x <- bst.Tree
|
||||
bst.Tree.x$feature_names <- NULL
|
||||
@ -219,20 +226,20 @@ test_that("xgb.importance works with and without feature names", {
|
||||
expect_equal(dim(importance.Tree), c(7, 4))
|
||||
expect_equal(colnames(importance.Tree), c("Feature", "Gain", "Cover", "Frequency"))
|
||||
expect_output(str(importance.Tree), 'Feature.*\\"Age\\"')
|
||||
|
||||
|
||||
importance.Tree.0 <- xgb.importance(model = bst.Tree)
|
||||
expect_equal(importance.Tree, importance.Tree.0)
|
||||
|
||||
|
||||
# when model contains no feature names:
|
||||
bst.Tree.x <- bst.Tree
|
||||
bst.Tree.x$feature_names <- NULL
|
||||
importance.Tree.x <- xgb.importance(model = bst.Tree)
|
||||
expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE])
|
||||
|
||||
|
||||
imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
|
||||
expect_equal(colnames(imp2plot), c("Feature", "Gain", "Cover", "Frequency", "Importance"))
|
||||
xgb.ggplot.importance(importance_matrix = importance.Tree)
|
||||
|
||||
|
||||
# for multiclass
|
||||
imp.Tree <- xgb.importance(model = mbst.Tree)
|
||||
expect_equal(dim(imp.Tree), c(4, 4))
|
||||
@ -247,7 +254,7 @@ test_that("xgb.importance works with GLM model", {
|
||||
imp2plot <- xgb.plot.importance(importance.GLM)
|
||||
expect_equal(colnames(imp2plot), c("Feature", "Weight", "Importance"))
|
||||
xgb.ggplot.importance(importance.GLM)
|
||||
|
||||
|
||||
# for multiclass
|
||||
imp.GLM <- xgb.importance(model = mbst.GLM)
|
||||
expect_equal(dim(imp.GLM), c(12, 3))
|
||||
|
||||
@ -115,10 +115,11 @@ class GradientBooster {
|
||||
* \param out_contribs output vector to hold the contributions
|
||||
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
|
||||
* we do not limit number of trees
|
||||
* \param approximate use a faster (inconsistent) approximation of SHAP values
|
||||
*/
|
||||
virtual void PredictContribution(DMatrix* dmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit = 0) = 0;
|
||||
unsigned ntree_limit = 0, bool approximate = false) = 0;
|
||||
|
||||
/*!
|
||||
* \brief dump the model in the requested format
|
||||
|
||||
@ -104,13 +104,15 @@ class Learner : public rabit::Serializable {
|
||||
* predictor, when it equals 0, this means we are using all the trees
|
||||
* \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor
|
||||
* \param pred_contribs whether to only predict the feature contributions
|
||||
* \param approx_contribs whether to approximate the feature contributions for speed
|
||||
*/
|
||||
virtual void Predict(DMatrix* data,
|
||||
bool output_margin,
|
||||
std::vector<bst_float> *out_preds,
|
||||
unsigned ntree_limit = 0,
|
||||
bool pred_leaf = false,
|
||||
bool pred_contribs = false) const = 0;
|
||||
bool pred_contribs = false,
|
||||
bool approx_contribs = false) const = 0;
|
||||
/*!
|
||||
* \brief Set additional attribute to the Booster.
|
||||
* The property will be saved along the booster.
|
||||
|
||||
@ -144,12 +144,14 @@ class Predictor {
|
||||
* \param [in,out] out_contribs The output feature contribs.
|
||||
* \param model Model to make predictions from.
|
||||
* \param ntree_limit (Optional) The ntree limit.
|
||||
* \param approximate Use fast approximate algorithm.
|
||||
*/
|
||||
|
||||
virtual void PredictContribution(DMatrix* dmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit = 0) = 0;
|
||||
unsigned ntree_limit = 0,
|
||||
bool approximate = false) = 0;
|
||||
|
||||
/**
|
||||
* \fn static Predictor* Predictor::Create(std::string name);
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include <tuple>
|
||||
#include "./base.h"
|
||||
#include "./data.h"
|
||||
#include "./logging.h"
|
||||
@ -411,6 +412,20 @@ struct RTreeNodeStat {
|
||||
int leaf_child_cnt;
|
||||
};
|
||||
|
||||
// Used by TreeShap
|
||||
// data we keep about our decision path
|
||||
// note that pweight is included for convenience and is not tied with the other attributes
|
||||
// the pweight of the i'th path element is the permuation weight of paths with i-1 ones in them
|
||||
struct PathElement {
|
||||
int feature_index;
|
||||
bst_float zero_fraction;
|
||||
bst_float one_fraction;
|
||||
bst_float pweight;
|
||||
PathElement() {}
|
||||
PathElement(int i, bst_float z, bst_float o, bst_float w) :
|
||||
feature_index(i), zero_fraction(z), one_fraction(o), pweight(w) {}
|
||||
};
|
||||
|
||||
/*!
|
||||
* \brief define regression tree to be the most common tree model.
|
||||
* This is the data structure used in xgboost's major tree models.
|
||||
@ -482,13 +497,26 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
||||
*/
|
||||
inline bst_float Predict(const FVec& feat, unsigned root_id = 0) const;
|
||||
/*!
|
||||
* \brief calculate the feature contributions for the given root
|
||||
* \brief calculate the feature contributions (https://arxiv.org/abs/1706.06060) for the tree
|
||||
* \param feat dense feature vector, if the feature is missing the field is set to NaN
|
||||
* \param root_id starting root index of the instance
|
||||
* \param out_contribs output vector to hold the contributions
|
||||
*/
|
||||
inline void CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
|
||||
bst_float *out_contribs) const;
|
||||
inline void TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
unsigned node_index, unsigned unique_depth,
|
||||
PathElement *parent_unique_path, bst_float parent_zero_fraction,
|
||||
bst_float parent_one_fraction, int parent_feature_index) const;
|
||||
|
||||
/*!
|
||||
* \brief calculate the approximate feature contributions for the given root
|
||||
* \param feat dense feature vector, if the feature is missing the field is set to NaN
|
||||
* \param root_id starting root index of the instance
|
||||
* \param out_contribs output vector to hold the contributions
|
||||
*/
|
||||
inline void CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
|
||||
bst_float *out_contribs) const;
|
||||
/*!
|
||||
* \brief get next position of the tree given current pid
|
||||
* \param pid Current node id.
|
||||
@ -590,7 +618,7 @@ inline bst_float RegTree::FillNodeMeanValue(int nid) {
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
|
||||
inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
|
||||
bst_float *out_contribs) const {
|
||||
CHECK_GT(this->node_mean_values.size(), 0U);
|
||||
// this follows the idea of http://blog.datadive.net/interpreting-random-forests/
|
||||
@ -617,6 +645,154 @@ inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned
|
||||
out_contribs[split_index] += leaf_value - node_value;
|
||||
}
|
||||
|
||||
// extend our decision path with a fraction of one and zero extensions
|
||||
inline void ExtendPath(PathElement *unique_path, unsigned unique_depth,
|
||||
bst_float zero_fraction, bst_float one_fraction, int feature_index) {
|
||||
unique_path[unique_depth].feature_index = feature_index;
|
||||
unique_path[unique_depth].zero_fraction = zero_fraction;
|
||||
unique_path[unique_depth].one_fraction = one_fraction;
|
||||
unique_path[unique_depth].pweight = (unique_depth == 0 ? 1 : 0);
|
||||
for (int i = unique_depth-1; i >= 0; i--) {
|
||||
unique_path[i+1].pweight += one_fraction*unique_path[i].pweight*(i+1)
|
||||
/ static_cast<bst_float>(unique_depth+1);
|
||||
unique_path[i].pweight = zero_fraction*unique_path[i].pweight*(unique_depth-i)
|
||||
/ static_cast<bst_float>(unique_depth+1);
|
||||
}
|
||||
}
|
||||
|
||||
// undo a previous extension of the decision path
|
||||
inline void UnwindPath(PathElement *unique_path, unsigned unique_depth, unsigned path_index) {
|
||||
const bst_float one_fraction = unique_path[path_index].one_fraction;
|
||||
const bst_float zero_fraction = unique_path[path_index].zero_fraction;
|
||||
bst_float next_one_portion = unique_path[unique_depth].pweight;
|
||||
|
||||
for (int i = unique_depth-1; i >= 0; --i) {
|
||||
if (one_fraction != 0) {
|
||||
const bst_float tmp = unique_path[i].pweight;
|
||||
unique_path[i].pweight = next_one_portion*(unique_depth+1)
|
||||
/ static_cast<bst_float>((i+1)*one_fraction);
|
||||
next_one_portion = tmp - unique_path[i].pweight*zero_fraction*(unique_depth-i)
|
||||
/ static_cast<bst_float>(unique_depth+1);
|
||||
} else {
|
||||
unique_path[i].pweight = (unique_path[i].pweight*(unique_depth+1))
|
||||
/ static_cast<bst_float>(zero_fraction*(unique_depth-i));
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = path_index; i < unique_depth; ++i) {
|
||||
unique_path[i].feature_index = unique_path[i+1].feature_index;
|
||||
unique_path[i].zero_fraction = unique_path[i+1].zero_fraction;
|
||||
unique_path[i].one_fraction = unique_path[i+1].one_fraction;
|
||||
}
|
||||
}
|
||||
|
||||
// determine what the total permuation weight would be if
|
||||
// we unwound a previous extension in the decision path
|
||||
inline bst_float UnwoundPathSum(const PathElement *unique_path, unsigned unique_depth,
|
||||
unsigned path_index) {
|
||||
const bst_float one_fraction = unique_path[path_index].one_fraction;
|
||||
const bst_float zero_fraction = unique_path[path_index].zero_fraction;
|
||||
bst_float next_one_portion = unique_path[unique_depth].pweight;
|
||||
bst_float total = 0;
|
||||
for (int i = unique_depth-1; i >= 0; --i) {
|
||||
if (one_fraction != 0) {
|
||||
const bst_float tmp = next_one_portion*(unique_depth+1)
|
||||
/ static_cast<bst_float>((i+1)*one_fraction);
|
||||
total += tmp;
|
||||
next_one_portion = unique_path[i].pweight - tmp*zero_fraction*((unique_depth-i)
|
||||
/ static_cast<bst_float>(unique_depth+1));
|
||||
} else {
|
||||
total += (unique_path[i].pweight/zero_fraction)/((unique_depth-i)
|
||||
/ static_cast<bst_float>(unique_depth+1));
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
// recursive computation of SHAP values for a decision tree
|
||||
inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
||||
unsigned node_index, unsigned unique_depth,
|
||||
PathElement *parent_unique_path, bst_float parent_zero_fraction,
|
||||
bst_float parent_one_fraction, int parent_feature_index) const {
|
||||
const auto node = (*this)[node_index];
|
||||
|
||||
// extend the unique path
|
||||
PathElement *unique_path = parent_unique_path + unique_depth;
|
||||
if (unique_depth > 0) std::copy(parent_unique_path, parent_unique_path+unique_depth, unique_path);
|
||||
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
|
||||
parent_one_fraction, parent_feature_index);
|
||||
const unsigned split_index = node.split_index();
|
||||
|
||||
// leaf node
|
||||
if (node.is_leaf()) {
|
||||
for (int i = 1; i <= unique_depth; ++i) {
|
||||
const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
|
||||
const PathElement &el = unique_path[i];
|
||||
phi[el.feature_index] += w*(el.one_fraction-el.zero_fraction)*node.leaf_value();
|
||||
}
|
||||
|
||||
// internal node
|
||||
} else {
|
||||
// find which branch is "hot" (meaning x would follow it)
|
||||
unsigned hot_index = 0;
|
||||
if (feat.is_missing(split_index)) {
|
||||
hot_index = node.cdefault();
|
||||
} else if (feat.fvalue(split_index) < node.split_cond()) {
|
||||
hot_index = node.cleft();
|
||||
} else {
|
||||
hot_index = node.cright();
|
||||
}
|
||||
const unsigned cold_index = (hot_index == node.cleft() ? node.cright() : node.cleft());
|
||||
const bst_float w = this->stat(node_index).sum_hess;
|
||||
const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess/w;
|
||||
const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess/w;
|
||||
bst_float incoming_zero_fraction = 1;
|
||||
bst_float incoming_one_fraction = 1;
|
||||
|
||||
// see if we have already split on this feature,
|
||||
// if so we undo that split so we can redo it for this node
|
||||
unsigned path_index = 0;
|
||||
for (; path_index <= unique_depth; ++path_index) {
|
||||
if (unique_path[path_index].feature_index == split_index) break;
|
||||
}
|
||||
if (path_index != unique_depth+1) {
|
||||
incoming_zero_fraction = unique_path[path_index].zero_fraction;
|
||||
incoming_one_fraction = unique_path[path_index].one_fraction;
|
||||
UnwindPath(unique_path, unique_depth, path_index);
|
||||
unique_depth -= 1;
|
||||
}
|
||||
|
||||
TreeShap(feat, phi, hot_index, unique_depth+1, unique_path,
|
||||
hot_zero_fraction*incoming_zero_fraction, incoming_one_fraction, split_index);
|
||||
|
||||
TreeShap(feat, phi, cold_index, unique_depth+1, unique_path,
|
||||
cold_zero_fraction*incoming_zero_fraction, 0, split_index);
|
||||
}
|
||||
}
|
||||
|
||||
inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
|
||||
bst_float *out_contribs) const {
|
||||
// find the expected value of the tree's predictions
|
||||
bst_float base_value = 0.0;
|
||||
bst_float total_cover = 0;
|
||||
for (unsigned i = 0; i < (*this).param.num_nodes; ++i) {
|
||||
const auto node = (*this)[i];
|
||||
if (node.is_leaf()) {
|
||||
const auto cover = this->stat(i).sum_hess;
|
||||
base_value += cover*node.leaf_value();
|
||||
total_cover += cover;
|
||||
}
|
||||
}
|
||||
out_contribs[feat.size()] += base_value / total_cover;
|
||||
|
||||
// Preallocate space for the unique path data
|
||||
const int maxd = this->MaxDepth(root_id)+1;
|
||||
PathElement *unique_path_data = new PathElement[(maxd*(maxd+1))/2];
|
||||
|
||||
TreeShap(feat, out_contribs, root_id, 0, unique_path_data, 1, 1, -1);
|
||||
delete[] unique_path_data;
|
||||
}
|
||||
|
||||
/*! \brief get next position of the tree given current pid */
|
||||
inline int RegTree::GetNext(int pid, bst_float fvalue, bool is_unknown) const {
|
||||
bst_float split_value = (*this)[pid].split_cond();
|
||||
|
||||
@ -990,7 +990,7 @@ class Booster(object):
|
||||
return self.eval_set([(data, name)], iteration)
|
||||
|
||||
def predict(self, data, output_margin=False, ntree_limit=0, pred_leaf=False,
|
||||
pred_contribs=False):
|
||||
pred_contribs=False, approx_contribs=False):
|
||||
"""
|
||||
Predict with data.
|
||||
|
||||
@ -1018,9 +1018,12 @@ class Booster(object):
|
||||
|
||||
pred_contribs : bool
|
||||
When this option is on, the output will be a matrix of (nsample, nfeats+1)
|
||||
with each record indicating the feature contributions of all trees. The sum of
|
||||
all feature contributions is equal to the prediction. Note that the bias is added
|
||||
as the final column, on top of the regular features.
|
||||
with each record indicating the feature contributions (SHAP values) for that
|
||||
prediction. The sum of all feature contributions is equal to the prediction.
|
||||
Note that the bias is added as the final column, on top of the regular features.
|
||||
|
||||
approx_contribs : bool
|
||||
Approximate the contributions of each feature
|
||||
|
||||
Returns
|
||||
-------
|
||||
@ -1033,6 +1036,8 @@ class Booster(object):
|
||||
option_mask |= 0x02
|
||||
if pred_contribs:
|
||||
option_mask |= 0x04
|
||||
if approx_contribs:
|
||||
option_mask |= 0x08
|
||||
|
||||
self._validate_features(data)
|
||||
|
||||
|
||||
@ -758,7 +758,8 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle,
|
||||
(option_mask & 1) != 0,
|
||||
&preds, ntree_limit,
|
||||
(option_mask & 2) != 0,
|
||||
(option_mask & 4) != 0);
|
||||
(option_mask & 4) != 0,
|
||||
(option_mask & 8) != 0);
|
||||
*out_result = dmlc::BeginPtr(preds);
|
||||
*len = static_cast<xgboost::bst_ulong>(preds.size());
|
||||
API_END();
|
||||
|
||||
@ -224,7 +224,7 @@ class GBLinear : public GradientBooster {
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit) override {
|
||||
unsigned ntree_limit, bool approximate) override {
|
||||
if (model.weight.size() == 0) {
|
||||
model.InitModel();
|
||||
}
|
||||
|
||||
@ -233,8 +233,8 @@ class GBTree : public GradientBooster {
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
unsigned ntree_limit) override {
|
||||
predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit);
|
||||
unsigned ntree_limit, bool approximate) override {
|
||||
predictor->PredictContribution(p_fmat, out_contribs, model_, ntree_limit, approximate);
|
||||
}
|
||||
|
||||
std::vector<std::string> DumpModel(const FeatureMap& fmap,
|
||||
|
||||
@ -433,9 +433,9 @@ class LearnerImpl : public Learner {
|
||||
|
||||
void Predict(DMatrix* data, bool output_margin,
|
||||
std::vector<bst_float>* out_preds, unsigned ntree_limit,
|
||||
bool pred_leaf, bool pred_contribs) const override {
|
||||
bool pred_leaf, bool pred_contribs, bool approx_contribs) const override {
|
||||
if (pred_contribs) {
|
||||
gbm_->PredictContribution(data, out_preds, ntree_limit);
|
||||
gbm_->PredictContribution(data, out_preds, ntree_limit, approx_contribs);
|
||||
} else if (pred_leaf) {
|
||||
gbm_->PredictLeaf(data, out_preds, ntree_limit);
|
||||
} else {
|
||||
|
||||
@ -206,9 +206,9 @@ class CPUPredictor : public Predictor {
|
||||
}
|
||||
}
|
||||
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit) override {
|
||||
void PredictContribution(DMatrix* p_fmat, std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model, unsigned ntree_limit,
|
||||
bool approximate) override {
|
||||
const int nthread = omp_get_max_threads();
|
||||
InitThreadTemp(nthread, model.param.num_feature);
|
||||
const MetaInfo& info = p_fmat->info();
|
||||
@ -225,10 +225,12 @@ class CPUPredictor : public Predictor {
|
||||
// make sure contributions is zeroed, we could be reusing a previously
|
||||
// allocated one
|
||||
std::fill(contribs.begin(), contribs.end(), 0);
|
||||
// initialize tree node mean values
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
|
||||
model.trees[i]->FillNodeMeanValues();
|
||||
if (approximate) {
|
||||
// initialize tree node mean values
|
||||
#pragma omp parallel for schedule(static)
|
||||
for (bst_omp_uint i = 0; i < ntree_limit; ++i) {
|
||||
model.trees[i]->FillNodeMeanValues();
|
||||
}
|
||||
}
|
||||
// start collecting the contributions
|
||||
dmlc::DataIter<RowBatch>* iter = p_fmat->RowIterator();
|
||||
@ -253,7 +255,11 @@ class CPUPredictor : public Predictor {
|
||||
if (model.tree_info[j] != gid) {
|
||||
continue;
|
||||
}
|
||||
model.trees[j]->CalculateContributions(feats, root_id, p_contribs);
|
||||
if (!approximate) {
|
||||
model.trees[j]->CalculateContributions(feats, root_id, p_contribs);
|
||||
} else {
|
||||
model.trees[j]->CalculateContributionsApprox(feats, root_id, p_contribs);
|
||||
}
|
||||
}
|
||||
feats.Drop(batch[i]);
|
||||
// add base margin to BIAS
|
||||
|
||||
@ -384,9 +384,10 @@ class GPUPredictor : public xgboost::Predictor {
|
||||
void PredictContribution(DMatrix* p_fmat,
|
||||
std::vector<bst_float>* out_contribs,
|
||||
const gbm::GBTreeModel& model,
|
||||
unsigned ntree_limit) override {
|
||||
unsigned ntree_limit,
|
||||
bool approximate) override {
|
||||
cpu_predictor->PredictContribution(p_fmat, out_contribs, model,
|
||||
ntree_limit);
|
||||
ntree_limit, approximate);
|
||||
}
|
||||
|
||||
void Init(const std::vector<std::pair<std::string, std::string>>& cfg,
|
||||
|
||||
@ -12,6 +12,7 @@ TEST(cpu_predictor, Test) {
|
||||
trees.push_back(std::unique_ptr<RegTree>(new RegTree));
|
||||
trees.back()->InitModel();
|
||||
(*trees.back())[0].set_leaf(1.5f);
|
||||
(*trees.back()).stat(0).sum_hess = 1.0f;
|
||||
gbm::GBTreeModel model(0.5);
|
||||
model.CommitModel(std::move(trees), 0);
|
||||
model.param.num_output_group = 1;
|
||||
@ -50,5 +51,11 @@ TEST(cpu_predictor, Test) {
|
||||
for (int i = 0; i < out_contribution.size(); i++) {
|
||||
ASSERT_EQ(out_contribution[i], 1.5);
|
||||
}
|
||||
|
||||
// Test predict contribution (approximate method)
|
||||
cpu_predictor->PredictContribution(dmat.get(), &out_contribution, model, true);
|
||||
for (int i = 0; i < out_contribution.size(); i++) {
|
||||
ASSERT_EQ(out_contribution[i], 1.5);
|
||||
}
|
||||
}
|
||||
} // namespace xgboost
|
||||
} // namespace xgboost
|
||||
|
||||
@ -19,6 +19,7 @@ TEST(gpu_predictor, Test) {
|
||||
trees.push_back(std::unique_ptr<RegTree>());
|
||||
trees.back()->InitModel();
|
||||
(*trees.back())[0].set_leaf(1.5f);
|
||||
(*trees.back()).stat(0).sum_hess = 1.0f;
|
||||
gbm::GBTreeModel model(0.5);
|
||||
model.CommitModel(std::move(trees), 0);
|
||||
model.param.num_output_group = 1;
|
||||
|
||||
@ -291,3 +291,18 @@ def test_contributions():
|
||||
|
||||
for max_depth, num_rounds in itertools.product(range(0, 3), range(1, 5)):
|
||||
yield test_fn, max_depth, num_rounds
|
||||
|
||||
# check that we get the right SHAP values for a basic AND example
|
||||
# (https://arxiv.org/abs/1706.06060)
|
||||
X = np.zeros((4, 2))
|
||||
X[0, :] = 1
|
||||
X[1, 0] = 1
|
||||
X[2, 1] = 1
|
||||
y = np.zeros(4)
|
||||
y[0] = 1
|
||||
param = {"max_depth": 2, "base_score": 0.0, "eta": 1.0, "lambda": 0}
|
||||
bst = xgb.train(param, xgb.DMatrix(X, label=y), 1)
|
||||
out = bst.predict(xgb.DMatrix(X[0:1, :]), pred_contribs=True)
|
||||
assert out[0, 0] == 0.375
|
||||
assert out[0, 1] == 0.375
|
||||
assert out[0, 2] == 0.25
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user