[R] maintenance Nov 2017; SHAP plots (#2888)
* [R] fix predict contributions for data with no colnames * [R] add a render parameter for xgb.plot.multi.trees; fixes #2628 * [R] update Rd's * [R] remove unnecessary dep-package from R cmake install * silence type warnings; readability * [R] silence complaint about incomplete line at the end * [R] initial version of xgb.plot.shap() * [R] more work on xgb.plot.shap * [R] enforce black font in xgb.plot.tree; fixes #2640 * [R] if feature names are available, check in predict that they are the same; fixes #2857 * [R] cran check and lint fixes * remove tabs * [R] add references; a test for plot.shap
This commit is contained in:
parent
1b77903eeb
commit
e8a6597957
@ -1,8 +1,8 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 0.6.4.7
|
Version: 0.6.4.8
|
||||||
Date: 2017-09-25
|
Date: 2017-12-05
|
||||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
||||||
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
|
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
|
||||||
Yuan Tang <terrytangyuan@gmail.com>
|
Yuan Tang <terrytangyuan@gmail.com>
|
||||||
|
|||||||
@ -40,6 +40,7 @@ export(xgb.model.dt.tree)
|
|||||||
export(xgb.plot.deepness)
|
export(xgb.plot.deepness)
|
||||||
export(xgb.plot.importance)
|
export(xgb.plot.importance)
|
||||||
export(xgb.plot.multi.trees)
|
export(xgb.plot.multi.trees)
|
||||||
|
export(xgb.plot.shap)
|
||||||
export(xgb.plot.tree)
|
export(xgb.plot.tree)
|
||||||
export(xgb.save)
|
export(xgb.save)
|
||||||
export(xgb.save.raw)
|
export(xgb.save.raw)
|
||||||
@ -60,9 +61,12 @@ importFrom(data.table,rbindlist)
|
|||||||
importFrom(data.table,setkey)
|
importFrom(data.table,setkey)
|
||||||
importFrom(data.table,setkeyv)
|
importFrom(data.table,setkeyv)
|
||||||
importFrom(data.table,setnames)
|
importFrom(data.table,setnames)
|
||||||
|
importFrom(grDevices,rgb)
|
||||||
importFrom(graphics,barplot)
|
importFrom(graphics,barplot)
|
||||||
importFrom(graphics,grid)
|
importFrom(graphics,grid)
|
||||||
|
importFrom(graphics,lines)
|
||||||
importFrom(graphics,par)
|
importFrom(graphics,par)
|
||||||
|
importFrom(graphics,points)
|
||||||
importFrom(graphics,title)
|
importFrom(graphics,title)
|
||||||
importFrom(magrittr,"%>%")
|
importFrom(magrittr,"%>%")
|
||||||
importFrom(stats,median)
|
importFrom(stats,median)
|
||||||
|
|||||||
@ -150,7 +150,7 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
|||||||
#' Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
|
#' Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
|
||||||
#' individual predictions. For "gblinear" booster, feature contributions are simply linear terms
|
#' individual predictions. For "gblinear" booster, feature contributions are simply linear terms
|
||||||
#' (feature_beta * feature_value). For "gbtree" booster, feature contributions are SHAP
|
#' (feature_beta * feature_value). For "gbtree" booster, feature contributions are SHAP
|
||||||
#' values (https://arxiv.org/abs/1706.06060) that sum to the difference between the expected output
|
#' values (Lundberg 2017) that sum to the difference between the expected output
|
||||||
#' of the model and the current prediction (where the hessian weights are used to compute the expectations).
|
#' of the model and the current prediction (where the hessian weights are used to compute the expectations).
|
||||||
#' Setting \code{approxcontrib = TRUE} approximates these values following the idea explained
|
#' Setting \code{approxcontrib = TRUE} approximates these values following the idea explained
|
||||||
#' in \url{http://blog.datadive.net/interpreting-random-forests/}.
|
#' in \url{http://blog.datadive.net/interpreting-random-forests/}.
|
||||||
@ -173,6 +173,12 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
|||||||
#' @seealso
|
#' @seealso
|
||||||
#' \code{\link{xgb.train}}.
|
#' \code{\link{xgb.train}}.
|
||||||
#'
|
#'
|
||||||
|
#' @references
|
||||||
|
#'
|
||||||
|
#' Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions", NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
|
||||||
|
#'
|
||||||
|
#' Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles", \url{https://arxiv.org/abs/1706.06060}
|
||||||
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' ## binary classification:
|
#' ## binary classification:
|
||||||
#'
|
#'
|
||||||
@ -265,6 +271,10 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
|||||||
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
||||||
if (!inherits(newdata, "xgb.DMatrix"))
|
if (!inherits(newdata, "xgb.DMatrix"))
|
||||||
newdata <- xgb.DMatrix(newdata, missing = missing)
|
newdata <- xgb.DMatrix(newdata, missing = missing)
|
||||||
|
if (!is.null(object[["feature_names"]]) &&
|
||||||
|
!is.null(colnames(newdata)) &&
|
||||||
|
!identical(object[["feature_names"]], colnames(newdata)))
|
||||||
|
stop("Feature names stored in `object` and `newdata` are different!")
|
||||||
if (is.null(ntreelimit))
|
if (is.null(ntreelimit))
|
||||||
ntreelimit <- NVL(object$best_ntreelimit, 0)
|
ntreelimit <- NVL(object$best_ntreelimit, 0)
|
||||||
if (NVL(object$params[['booster']], '') == 'gblinear')
|
if (NVL(object$params[['booster']], '') == 'gblinear')
|
||||||
@ -292,7 +302,7 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
|||||||
} else if (predcontrib) {
|
} else if (predcontrib) {
|
||||||
n_col1 <- ncol(newdata) + 1
|
n_col1 <- ncol(newdata) + 1
|
||||||
n_group <- npred_per_case / n_col1
|
n_group <- npred_per_case / n_col1
|
||||||
dnames <- list(NULL, c(colnames(newdata), "BIAS"))
|
dnames <- if (!is.null(colnames(newdata))) list(NULL, c(colnames(newdata), "BIAS")) else NULL
|
||||||
ret <- if (n_ret == n_row) {
|
ret <- if (n_ret == n_row) {
|
||||||
matrix(ret, ncol = 1, dimnames = dnames)
|
matrix(ret, ncol = 1, dimnames = dnames)
|
||||||
} else if (n_group == 1) {
|
} else if (n_group == 1) {
|
||||||
|
|||||||
@ -136,4 +136,4 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
|
|||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
# The reason is that these variables are never declared
|
# The reason is that these variables are never declared
|
||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(c(".", ".N", "Gain", "Cover", "Frequency", "Feature"))
|
globalVariables(c(".", ".N", "Gain", "Cover", "Frequency", "Feature", "Class"))
|
||||||
|
|||||||
@ -7,10 +7,9 @@
|
|||||||
#' @param features_keep number of features to keep in each position of the multi trees.
|
#' @param features_keep number of features to keep in each position of the multi trees.
|
||||||
#' @param plot_width width in pixels of the graph to produce
|
#' @param plot_width width in pixels of the graph to produce
|
||||||
#' @param plot_height height in pixels of the graph to produce
|
#' @param plot_height height in pixels of the graph to produce
|
||||||
|
#' @param render a logical flag for whether the graph should be rendered (see Value).
|
||||||
#' @param ... currently not used
|
#' @param ... currently not used
|
||||||
#'
|
#'
|
||||||
#' @return Two graphs showing the distribution of the model deepness.
|
|
||||||
#'
|
|
||||||
#' @details
|
#' @details
|
||||||
#'
|
#'
|
||||||
#' This function tries to capture the complexity of a gradient boosted tree model
|
#' This function tries to capture the complexity of a gradient boosted tree model
|
||||||
@ -30,26 +29,46 @@
|
|||||||
#' This function is inspired by this blog post:
|
#' This function is inspired by this blog post:
|
||||||
#' \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
|
#' \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
|
||||||
#'
|
#'
|
||||||
|
#' @return
|
||||||
|
#'
|
||||||
|
#' When \code{render = TRUE}:
|
||||||
|
#' returns a rendered graph object which is an \code{htmlwidget} of class \code{grViz}.
|
||||||
|
#' Similar to ggplot objects, it needs to be printed to see it when not running from command line.
|
||||||
|
#'
|
||||||
|
#' When \code{render = FALSE}:
|
||||||
|
#' silently returns a graph object which is of DiagrammeR's class \code{dgr_graph}.
|
||||||
|
#' This could be useful if one wants to modify some of the graph attributes
|
||||||
|
#' before rendering the graph with \code{\link[DiagrammeR]{render_graph}}.
|
||||||
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
|
#'
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#'
|
#'
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||||
#' eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
#' eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
||||||
#' min_child_weight = 50)
|
#' min_child_weight = 50, verbose = 0)
|
||||||
#'
|
#'
|
||||||
#' p <- xgb.plot.multi.trees(model = bst, feature_names = colnames(agaricus.train$data),
|
#' p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
||||||
#' features_keep = 3)
|
|
||||||
#' print(p)
|
#' print(p)
|
||||||
#'
|
#'
|
||||||
|
#' \dontrun{
|
||||||
|
#' # Below is an example of how to save this plot to a file.
|
||||||
|
#' # Note that for `export_graph` to work, the DiagrammeRsvg and rsvg packages must also be installed.
|
||||||
|
#' library(DiagrammeR)
|
||||||
|
#' gr <- xgb.plot.multi.trees(model=bst, features_keep = 3, render=FALSE)
|
||||||
|
#' export_graph(gr, 'tree.pdf', width=1500, height=600)
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, plot_width = NULL, plot_height = NULL, ...){
|
xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, plot_width = NULL, plot_height = NULL,
|
||||||
|
render = TRUE, ...){
|
||||||
check.deprecation(...)
|
check.deprecation(...)
|
||||||
tree.matrix <- xgb.model.dt.tree(feature_names = feature_names, model = model)
|
tree.matrix <- xgb.model.dt.tree(feature_names = feature_names, model = model)
|
||||||
|
|
||||||
# first number of the path represents the tree, then the following numbers are related to the path to follow
|
# first number of the path represents the tree, then the following numbers are related to the path to follow
|
||||||
# root init
|
# root init
|
||||||
root.nodes <- tree.matrix[stri_detect_regex(ID, "\\d+-0"), ID]
|
root.nodes <- tree.matrix[stri_detect_regex(ID, "\\d+-0"), ID]
|
||||||
tree.matrix[ID %in% root.nodes, abs.node.position:=root.nodes]
|
tree.matrix[ID %in% root.nodes, abs.node.position := root.nodes]
|
||||||
|
|
||||||
precedent.nodes <- root.nodes
|
precedent.nodes <- root.nodes
|
||||||
|
|
||||||
@ -64,9 +83,8 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
|
|||||||
precedent.nodes <- c(yes.nodes.abs.pos, no.nodes.abs.pos)
|
precedent.nodes <- c(yes.nodes.abs.pos, no.nodes.abs.pos)
|
||||||
}
|
}
|
||||||
|
|
||||||
tree.matrix[!is.na(Yes),Yes:= paste0(abs.node.position, "_0")]
|
tree.matrix[!is.na(Yes), Yes := paste0(abs.node.position, "_0")]
|
||||||
tree.matrix[!is.na(No),No:= paste0(abs.node.position, "_1")]
|
tree.matrix[!is.na(No), No := paste0(abs.node.position, "_1")]
|
||||||
|
|
||||||
|
|
||||||
remove.tree <- . %>% stri_replace_first_regex(pattern = "^\\d+-", replacement = "")
|
remove.tree <- . %>% stri_replace_first_regex(pattern = "^\\d+-", replacement = "")
|
||||||
|
|
||||||
@ -121,6 +139,8 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
|
|||||||
attr = c("color", "arrowsize", "arrowhead", "fontname"),
|
attr = c("color", "arrowsize", "arrowhead", "fontname"),
|
||||||
value = c("DimGray", "1.5", "vee", "Helvetica"))
|
value = c("DimGray", "1.5", "vee", "Helvetica"))
|
||||||
|
|
||||||
|
if (!render) return(invisible(graph))
|
||||||
|
|
||||||
DiagrammeR::render_graph(graph, width = plot_width, height = plot_height)
|
DiagrammeR::render_graph(graph, width = plot_width, height = plot_height)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
214
R-package/R/xgb.plot.shap.R
Normal file
214
R-package/R/xgb.plot.shap.R
Normal file
@ -0,0 +1,214 @@
|
|||||||
|
#' SHAP contribution dependency plots
|
||||||
|
#'
|
||||||
|
#' Visualizing the SHAP feature contribution to prediction dependencies on feature value.
|
||||||
|
#'
|
||||||
|
#' @param data data as a \code{matrix} or \code{dgCMatrix}.
|
||||||
|
#' @param shap_contrib a matrix of SHAP contributions that was computed earlier for the above
|
||||||
|
#' \code{data}. When it is NULL, it is computed internally using \code{model} and \code{data}.
|
||||||
|
#' @param features a vector of either column indices or of feature names to plot. When it is NULL,
|
||||||
|
#' feature importance is calculated, and \code{top_n} high ranked features are taken.
|
||||||
|
#' @param top_n when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.
|
||||||
|
#' @param model an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
|
||||||
|
#' or \code{features} is missing.
|
||||||
|
#' @param trees passed to \code{\link{xgb.importance}} when \code{features = NULL}.
|
||||||
|
#' @param target_class is only relevant for multiclass models. When it is set to a 0-based class index,
|
||||||
|
#' only SHAP contributions for that specific class are used.
|
||||||
|
#' If it is not set, SHAP importances are averaged over all classes.
|
||||||
|
#' @param approxcontrib passed to \code{\link{predict.xgb.Booster}} when \code{shap_contrib = NULL}.
|
||||||
|
#' @param subsample a random fraction of data points to use for plotting. When it is NULL,
|
||||||
|
#' it is set so that up to 100K data points are used.
|
||||||
|
#' @param n_col a number of columns in a grid of plots.
|
||||||
|
#' @param col color of the scatterplot markers.
|
||||||
|
#' @param pch scatterplot marker.
|
||||||
|
#' @param discrete_n_uniq a maximal number of unique values in a feature to consider it as discrete.
|
||||||
|
#' @param discrete_jitter an \code{amount} parameter of jitter added to discrete features' positions.
|
||||||
|
#' @param ylab a y-axis label in 1D plots.
|
||||||
|
#' @param plot_NA whether the contributions of cases with missing values should also be plotted.
|
||||||
|
#' @param col_NA a color of marker for missing value contributions.
|
||||||
|
#' @param pch_NA a marker type for NA values.
|
||||||
|
#' @param pos_NA a relative position of the x-location where NA values are shown:
|
||||||
|
#' \code{min(x) + (max(x) - min(x)) * pos_NA}.
|
||||||
|
#' @param plot_loess whether to plot loess-smoothed curves. The smoothing is only done for features with
|
||||||
|
#' more than 5 distinct values.
|
||||||
|
#' @param col_loess a color to use for the loess curves.
|
||||||
|
#' @param span_loess the \code{span} paramerer in \code{\link[stats]{loess}}'s call.
|
||||||
|
#' @param which whether to do univariate or bivariate plotting. NOTE: only 1D is implemented so far.
|
||||||
|
#' @param plot whether a plot should be drawn. If FALSE, only a lits of matrices is returned.
|
||||||
|
#' @param ... other parameters passed to \code{plot}.
|
||||||
|
#'
|
||||||
|
#' @details
|
||||||
|
#'
|
||||||
|
#' These scatterplots represent how SHAP feature contributions depend of feature values.
|
||||||
|
#' The similarity to partial dependency plots is that they also give an idea for how feature values
|
||||||
|
#' affect predictions. However, in partial dependency plots, we usually see marginal dependencies
|
||||||
|
#' of model prediction on feature value, while SHAP contribution dependency plots display the estimated
|
||||||
|
#' contributions of a feature to model prediction for each individual case.
|
||||||
|
#'
|
||||||
|
#' When \code{plot_loess = TRUE} is set, feature values are rounded to 3 significant digits and
|
||||||
|
#' weighted LOESS is computed and plotted, where weights are the numbers of data points
|
||||||
|
#' at each rounded value.
|
||||||
|
#'
|
||||||
|
#' Note: SHAP contributions are shown on the scale of model margin. E.g., for a logistic binomial objective,
|
||||||
|
#' the margin is prediction before a sigmoidal transform into probability-like values.
|
||||||
|
#' Also, since SHAP stands for "SHapley Additive exPlanation" (model prediction = sum of SHAP
|
||||||
|
#' contributions for all features + bias), depending on the objective used, transforming SHAP
|
||||||
|
#' contributions for a feature from the marginal to the prediction space is not necessarily
|
||||||
|
#' a meaningful thing to do.
|
||||||
|
#'
|
||||||
|
#' @return
|
||||||
|
#'
|
||||||
|
#' In addition to producing plots (when \code{plot=TRUE}), it silently returns a list of two matrices:
|
||||||
|
#' \itemize{
|
||||||
|
#' \item \code{data} the values of selected features;
|
||||||
|
#' \item \code{shap_contrib} the contributions of selected features.
|
||||||
|
#' }
|
||||||
|
#'
|
||||||
|
#' @references
|
||||||
|
#'
|
||||||
|
#' Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions", NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
|
||||||
|
#'
|
||||||
|
#' Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles", \url{https://arxiv.org/abs/1706.06060}
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#'
|
||||||
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#'
|
||||||
|
#' bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
|
||||||
|
#' eta = 0.1, max_depth = 3, subsample = .5,
|
||||||
|
#' method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
|
||||||
|
#'
|
||||||
|
#' xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
||||||
|
#' contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
||||||
|
#' xgb.plot.shap(agaricus.test$data, contr, model = bst, top_n = 12, n_col = 3)
|
||||||
|
#'
|
||||||
|
#' # multiclass example - plots for each class separately:
|
||||||
|
#' nclass <- 3
|
||||||
|
#' nrounds <- 20
|
||||||
|
#' x <- as.matrix(iris[, -5])
|
||||||
|
#' set.seed(123)
|
||||||
|
#' is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
||||||
|
#' mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
||||||
|
#' max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
|
||||||
|
#' objective = "multi:softprob", num_class = nclass, verbose = 0)
|
||||||
|
#' trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
||||||
|
#' col <- rgb(0, 0, 1, 0.5)
|
||||||
|
#' xgb.plot.shap(x, model = mbst, trees = trees0, target_class = 0, top_n = 4, n_col = 2, col = col, pch = 16, pch_NA = 17)
|
||||||
|
#' xgb.plot.shap(x, model = mbst, trees = trees0 + 1, target_class = 1, top_n = 4, n_col = 2, col = col, pch = 16, pch_NA = 17)
|
||||||
|
#' xgb.plot.shap(x, model = mbst, trees = trees0 + 2, target_class = 2, top_n = 4, n_col = 2, col = col, pch = 16, pch_NA = 17)
|
||||||
|
#'
|
||||||
|
#' @rdname xgb.plot.shap
|
||||||
|
#' @export
|
||||||
|
xgb.plot.shap <- function(data, shap_contrib = NULL, features = NULL, top_n = 1, model = NULL,
|
||||||
|
trees = NULL, target_class = NULL, approxcontrib = FALSE,
|
||||||
|
subsample = NULL, n_col = 1, col = rgb(0, 0, 1, 0.2), pch = '.',
|
||||||
|
discrete_n_uniq = 5, discrete_jitter = 0.01, ylab = "SHAP",
|
||||||
|
plot_NA = TRUE, col_NA = rgb(0.7, 0, 1, 0.6), pch_NA = '.', pos_NA = 1.07,
|
||||||
|
plot_loess = TRUE, col_loess = 2, span_loess = 0.5,
|
||||||
|
which = c("1d", "2d"), plot = TRUE, ...) {
|
||||||
|
|
||||||
|
if (!is.matrix(data) && !inherits(data, "dgCMatrix"))
|
||||||
|
stop("data: must be either matrix or dgCMatrix")
|
||||||
|
|
||||||
|
if (is.null(shap_contrib) && (is.null(model) || !inherits(model, "xgb.Booster")))
|
||||||
|
stop("when shap_contrib is not provided, one must provide an xgb.Booster model")
|
||||||
|
|
||||||
|
if (is.null(features) && (is.null(model) || !inherits(model, "xgb.Booster")))
|
||||||
|
stop("when features are not provided, one must provide an xgb.Booster model to rank the features")
|
||||||
|
|
||||||
|
if (!is.null(shap_contrib) &&
|
||||||
|
(!is.matrix(shap_contrib) || nrow(shap_contrib) != nrow(data) || ncol(shap_contrib) != ncol(data) + 1))
|
||||||
|
stop("shap_contrib is not compatible with the provided data")
|
||||||
|
|
||||||
|
nsample <- if (is.null(subsample)) min(100000, nrow(data)) else as.integer(subsample * nrow(data))
|
||||||
|
idx <- sample(1:nrow(data), nsample)
|
||||||
|
data <- data[idx,]
|
||||||
|
|
||||||
|
if (is.null(shap_contrib)) {
|
||||||
|
shap_contrib <- predict(model, data, predcontrib = TRUE, approxcontrib = approxcontrib)
|
||||||
|
} else {
|
||||||
|
shap_contrib <- shap_contrib[idx,]
|
||||||
|
}
|
||||||
|
|
||||||
|
which <- match.arg(which)
|
||||||
|
if (which == "2d")
|
||||||
|
stop("2D plots are not implemented yet")
|
||||||
|
|
||||||
|
if (is.null(features)) {
|
||||||
|
imp <- xgb.importance(model = model, trees = trees)
|
||||||
|
top_n <- as.integer(top_n[1])
|
||||||
|
if (top_n < 1 && top_n > 100)
|
||||||
|
stop("top_n: must be an integer within [1, 100]")
|
||||||
|
features <- imp$Feature[1:min(top_n, NROW(imp))]
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is.character(features)) {
|
||||||
|
if (is.null(colnames(data)))
|
||||||
|
stop("Either provide `data` with column names or provide `features` as column indices")
|
||||||
|
features <- match(features, colnames(data))
|
||||||
|
}
|
||||||
|
|
||||||
|
if (n_col > length(features)) n_col <- length(features)
|
||||||
|
|
||||||
|
if (is.list(shap_contrib)) { # multiclass: either choose a class or merge
|
||||||
|
shap_contrib <- if (!is.null(target_class)) shap_contrib[[target_class + 1]]
|
||||||
|
else Reduce("+", lapply(shap_contrib, abs))
|
||||||
|
}
|
||||||
|
|
||||||
|
shap_contrib <- shap_contrib[, features, drop = FALSE]
|
||||||
|
data <- data[, features, drop = FALSE]
|
||||||
|
cols <- colnames(data)
|
||||||
|
if (is.null(cols)) cols <- colnames(shap_contrib)
|
||||||
|
if (is.null(cols)) cols <- paste0('X', 1:ncol(data))
|
||||||
|
colnames(data) <- cols
|
||||||
|
colnames(shap_contrib) <- cols
|
||||||
|
|
||||||
|
if (plot && which == "1d") {
|
||||||
|
op <- par(mfrow = c(ceiling(length(features) / n_col), n_col),
|
||||||
|
oma = c(0,0,0,0) + 0.2,
|
||||||
|
mar = c(3.5,3.5,0,0) + 0.1,
|
||||||
|
mgp = c(1.7, 0.6, 0))
|
||||||
|
for (f in cols) {
|
||||||
|
ord <- order(data[, f])
|
||||||
|
x <- data[, f][ord]
|
||||||
|
y <- shap_contrib[, f][ord]
|
||||||
|
x_lim <- range(x, na.rm = TRUE)
|
||||||
|
y_lim <- range(y, na.rm = TRUE)
|
||||||
|
do_na <- plot_NA && any(is.na(x))
|
||||||
|
if (do_na) {
|
||||||
|
x_range <- diff(x_lim)
|
||||||
|
loc_na <- min(x, na.rm = TRUE) + x_range * pos_NA
|
||||||
|
x_lim <- range(c(x_lim, loc_na))
|
||||||
|
}
|
||||||
|
x_uniq <- unique(x)
|
||||||
|
x2plot <- x
|
||||||
|
# add small jitter for discrete features with <= 5 distinct values
|
||||||
|
if (length(x_uniq) <= discrete_n_uniq)
|
||||||
|
x2plot <- jitter(x, amount = discrete_jitter * min(diff(x_uniq), na.rm = TRUE))
|
||||||
|
plot(x2plot, y, pch = pch, xlab = f, col = col, xlim = x_lim, ylim = y_lim, ylab = ylab, ...)
|
||||||
|
grid()
|
||||||
|
if (plot_loess) {
|
||||||
|
# compress x to 3 digits, and mean-aggredate y
|
||||||
|
zz <- data.table(x = signif(x, 3), y)[, .(.N, y=mean(y)), x]
|
||||||
|
if (nrow(zz) <= 5) {
|
||||||
|
lines(zz$x, zz$y, col = col_loess)
|
||||||
|
} else {
|
||||||
|
lo <- stats::loess(y ~ x, data = zz, weights = zz$N, span = span_loess)
|
||||||
|
zz$y_lo <- predict(lo, zz, type = "link")
|
||||||
|
lines(zz$x, zz$y_lo, col = col_loess)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (do_na) {
|
||||||
|
i_na <- which(is.na(x))
|
||||||
|
x_na <- rep(loc_na, length(i_na))
|
||||||
|
x_na <- jitter(x_na, amount = x_range * 0.01)
|
||||||
|
points(x_na, y[i_na], pch = pch_NA, col = col_NA)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
par(op)
|
||||||
|
}
|
||||||
|
if (plot && which == "2d") {
|
||||||
|
# TODO
|
||||||
|
}
|
||||||
|
invisible(list(data = data, shap_contrib = shap_contrib))
|
||||||
|
}
|
||||||
@ -95,7 +95,8 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
|
|||||||
label = dt$label,
|
label = dt$label,
|
||||||
fillcolor = dt$filledcolor,
|
fillcolor = dt$filledcolor,
|
||||||
shape = dt$shape,
|
shape = dt$shape,
|
||||||
data = dt$Feature)
|
data = dt$Feature,
|
||||||
|
fontcolor = "black")
|
||||||
|
|
||||||
edges <- DiagrammeR::create_edge_df(
|
edges <- DiagrammeR::create_edge_df(
|
||||||
from = match(dt[Feature != "Leaf", c(ID)] %>% rep(2), dt$ID),
|
from = match(dt[Feature != "Leaf", c(ID)] %>% rep(2), dt$ID),
|
||||||
|
|||||||
@ -169,6 +169,11 @@
|
|||||||
#' \code{\link{predict.xgb.Booster}},
|
#' \code{\link{predict.xgb.Booster}},
|
||||||
#' \code{\link{xgb.cv}}
|
#' \code{\link{xgb.cv}}
|
||||||
#'
|
#'
|
||||||
|
#' @references
|
||||||
|
#'
|
||||||
|
#' Tianqi Chen and Carlos Guestrin, "XGBoost: A Scalable Tree Boosting System",
|
||||||
|
#' 22nd SIGKDD Conference on Knowledge Discovery and Data Mining, 2016, \url{https://arxiv.org/abs/1603.02754}
|
||||||
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
|||||||
@ -100,9 +100,12 @@ NULL
|
|||||||
#' @importFrom stats median
|
#' @importFrom stats median
|
||||||
#' @importFrom utils head
|
#' @importFrom utils head
|
||||||
#' @importFrom graphics barplot
|
#' @importFrom graphics barplot
|
||||||
|
#' @importFrom graphics lines
|
||||||
|
#' @importFrom graphics points
|
||||||
#' @importFrom graphics grid
|
#' @importFrom graphics grid
|
||||||
#' @importFrom graphics par
|
#' @importFrom graphics par
|
||||||
#' @importFrom graphics title
|
#' @importFrom graphics title
|
||||||
|
#' @importFrom grDevices rgb
|
||||||
#'
|
#'
|
||||||
#' @import methods
|
#' @import methods
|
||||||
#' @useDynLib xgboost, .registration = TRUE
|
#' @useDynLib xgboost, .registration = TRUE
|
||||||
|
|||||||
@ -11,3 +11,4 @@ early_stopping Early Stop in training
|
|||||||
poisson_regression Poisson Regression on count data
|
poisson_regression Poisson Regression on count data
|
||||||
tweedie_regression Tweddie Regression
|
tweedie_regression Tweddie Regression
|
||||||
gpu_accelerated GPU-accelerated tree building algorithms
|
gpu_accelerated GPU-accelerated tree building algorithms
|
||||||
|
|
||||||
|
|||||||
@ -7,7 +7,7 @@
|
|||||||
\usage{
|
\usage{
|
||||||
\method{predict}{xgb.Booster}(object, newdata, missing = NA,
|
\method{predict}{xgb.Booster}(object, newdata, missing = NA,
|
||||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE,
|
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE,
|
||||||
predcontrib = FALSE, reshape = FALSE, ...)
|
predcontrib = FALSE, approxcontrib = FALSE, reshape = FALSE, ...)
|
||||||
|
|
||||||
\method{predict}{xgb.Booster.handle}(object, ...)
|
\method{predict}{xgb.Booster.handle}(object, ...)
|
||||||
}
|
}
|
||||||
@ -30,6 +30,8 @@ It will use all the trees by default (\code{NULL} value).}
|
|||||||
|
|
||||||
\item{predcontrib}{whether to return feature contributions to individual predictions instead (see Details).}
|
\item{predcontrib}{whether to return feature contributions to individual predictions instead (see Details).}
|
||||||
|
|
||||||
|
\item{approxcontrib}{whether to use a fast approximation for feature contributions (see Details).}
|
||||||
|
|
||||||
\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several
|
\item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several
|
||||||
prediction outputs per case. This option has no effect when \code{predleaf = TRUE}.}
|
prediction outputs per case. This option has no effect when \code{predleaf = TRUE}.}
|
||||||
|
|
||||||
@ -69,10 +71,11 @@ e.g., as implemented in \code{\link{xgb.create.features}}.
|
|||||||
|
|
||||||
Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
|
Setting \code{predcontrib = TRUE} allows to calculate contributions of each feature to
|
||||||
individual predictions. For "gblinear" booster, feature contributions are simply linear terms
|
individual predictions. For "gblinear" booster, feature contributions are simply linear terms
|
||||||
(feature_beta * feature_value). For "gbtree" booster, feature contribution is calculated
|
(feature_beta * feature_value). For "gbtree" booster, feature contributions are SHAP
|
||||||
as a sum of average contribution of that feature's split nodes across all trees to an
|
values (Lundberg 2017) that sum to the difference between the expected output
|
||||||
individual prediction, following the idea explained in
|
of the model and the current prediction (where the hessian weights are used to compute the expectations).
|
||||||
\url{http://blog.datadive.net/interpreting-random-forests/}.
|
Setting \code{approxcontrib = TRUE} approximates these values following the idea explained
|
||||||
|
in \url{http://blog.datadive.net/interpreting-random-forests/}.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
## binary classification:
|
## binary classification:
|
||||||
@ -98,7 +101,7 @@ str(pred_leaf)
|
|||||||
# the result is an nsamples X (nfeatures + 1) matrix
|
# the result is an nsamples X (nfeatures + 1) matrix
|
||||||
pred_contr <- predict(bst, test$data, predcontrib = TRUE)
|
pred_contr <- predict(bst, test$data, predcontrib = TRUE)
|
||||||
str(pred_contr)
|
str(pred_contr)
|
||||||
# verify that contributions' sums are equal to log-odds of predictions (up to foat precision):
|
# verify that contributions' sums are equal to log-odds of predictions (up to float precision):
|
||||||
summary(rowSums(pred_contr) - qlogis(pred))
|
summary(rowSums(pred_contr) - qlogis(pred))
|
||||||
# for the 1st record, let's inspect its features that had non-zero contribution to prediction:
|
# for the 1st record, let's inspect its features that had non-zero contribution to prediction:
|
||||||
contr1 <- pred_contr[1,]
|
contr1 <- pred_contr[1,]
|
||||||
@ -158,6 +161,11 @@ err <- sapply(1:25, function(n) {
|
|||||||
})
|
})
|
||||||
plot(err, type='l', ylim=c(0,0.1), xlab='#trees')
|
plot(err, type='l', ylim=c(0,0.1), xlab='#trees')
|
||||||
|
|
||||||
|
}
|
||||||
|
\references{
|
||||||
|
Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions", NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
|
||||||
|
|
||||||
|
Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles", \url{https://arxiv.org/abs/1706.06060}
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{xgb.train}}.
|
\code{\link{xgb.train}}.
|
||||||
|
|||||||
@ -5,7 +5,7 @@
|
|||||||
\title{Project all trees on one tree and plot it}
|
\title{Project all trees on one tree and plot it}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.plot.multi.trees(model, feature_names = NULL, features_keep = 5,
|
xgb.plot.multi.trees(model, feature_names = NULL, features_keep = 5,
|
||||||
plot_width = NULL, plot_height = NULL, ...)
|
plot_width = NULL, plot_height = NULL, render = TRUE, ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{model}{produced by the \code{xgb.train} function.}
|
\item{model}{produced by the \code{xgb.train} function.}
|
||||||
@ -18,10 +18,19 @@ xgb.plot.multi.trees(model, feature_names = NULL, features_keep = 5,
|
|||||||
|
|
||||||
\item{plot_height}{height in pixels of the graph to produce}
|
\item{plot_height}{height in pixels of the graph to produce}
|
||||||
|
|
||||||
|
\item{render}{a logical flag for whether the graph should be rendered (see Value).}
|
||||||
|
|
||||||
\item{...}{currently not used}
|
\item{...}{currently not used}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
Two graphs showing the distribution of the model deepness.
|
When \code{render = TRUE}:
|
||||||
|
returns a rendered graph object which is an \code{htmlwidget} of class \code{grViz}.
|
||||||
|
Similar to ggplot objects, it needs to be printed to see it when not running from command line.
|
||||||
|
|
||||||
|
When \code{render = FALSE}:
|
||||||
|
silently returns a graph object which is of DiagrammeR's class \code{dgr_graph}.
|
||||||
|
This could be useful if one wants to modify some of the graph attributes
|
||||||
|
before rendering the graph with \code{\link[DiagrammeR]{render_graph}}.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Visualization of the ensemble of trees as a single collective unit.
|
Visualization of the ensemble of trees as a single collective unit.
|
||||||
@ -45,14 +54,22 @@ This function is inspired by this blog post:
|
|||||||
\url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
|
\url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
|
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 15,
|
||||||
eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
eta = 1, nthread = 2, nrounds = 30, objective = "binary:logistic",
|
||||||
min_child_weight = 50)
|
min_child_weight = 50, verbose = 0)
|
||||||
|
|
||||||
p <- xgb.plot.multi.trees(model = bst, feature_names = colnames(agaricus.train$data),
|
p <- xgb.plot.multi.trees(model = bst, features_keep = 3)
|
||||||
features_keep = 3)
|
|
||||||
print(p)
|
print(p)
|
||||||
|
|
||||||
|
\dontrun{
|
||||||
|
# Below is an example of how to save this plot to a file.
|
||||||
|
# Note that for `export_graph` to work, the DiagrammeRsvg and rsvg packages must also be installed.
|
||||||
|
library(DiagrammeR)
|
||||||
|
gr <- xgb.plot.multi.trees(model=bst, features_keep = 3, render=FALSE)
|
||||||
|
export_graph(gr, 'tree.pdf', width=1500, height=600)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
135
R-package/man/xgb.plot.shap.Rd
Normal file
135
R-package/man/xgb.plot.shap.Rd
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.plot.shap.R
|
||||||
|
\name{xgb.plot.shap}
|
||||||
|
\alias{xgb.plot.shap}
|
||||||
|
\title{SHAP contribution dependency plots}
|
||||||
|
\usage{
|
||||||
|
xgb.plot.shap(data, shap_contrib = NULL, features = NULL, top_n = 1,
|
||||||
|
model = NULL, trees = NULL, target_class = NULL,
|
||||||
|
approxcontrib = FALSE, subsample = NULL, n_col = 1, col = rgb(0, 0, 1,
|
||||||
|
0.2), pch = ".", discrete_n_uniq = 5, discrete_jitter = 0.01,
|
||||||
|
ylab = "SHAP", plot_NA = TRUE, col_NA = rgb(0.7, 0, 1, 0.6),
|
||||||
|
pch_NA = ".", pos_NA = 1.07, plot_loess = TRUE, col_loess = 2,
|
||||||
|
span_loess = 0.5, which = c("1d", "2d"), plot = TRUE, ...)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{data}{data as a \code{matrix} or \code{dgCMatrix}.}
|
||||||
|
|
||||||
|
\item{shap_contrib}{a matrix of SHAP contributions that was computed earlier for the above
|
||||||
|
\code{data}. When it is NULL, it is computed internally using \code{model} and \code{data}.}
|
||||||
|
|
||||||
|
\item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
|
||||||
|
feature importance is calculated, and \code{top_n} high ranked features are taken.}
|
||||||
|
|
||||||
|
\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
|
||||||
|
|
||||||
|
\item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
|
||||||
|
or \code{features} is missing.}
|
||||||
|
|
||||||
|
\item{trees}{passed to \code{\link{xgb.importance}} when \code{features = NULL}.}
|
||||||
|
|
||||||
|
\item{target_class}{is only relevant for multiclass models. When it is set to a 0-based class index,
|
||||||
|
only SHAP contributions for that specific class are used.
|
||||||
|
If it is not set, SHAP importances are averaged over all classes.}
|
||||||
|
|
||||||
|
\item{approxcontrib}{passed to \code{\link{predict.xgb.Booster}} when \code{shap_contrib = NULL}.}
|
||||||
|
|
||||||
|
\item{subsample}{a random fraction of data points to use for plotting. When it is NULL,
|
||||||
|
it is set so that up to 100K data points are used.}
|
||||||
|
|
||||||
|
\item{n_col}{a number of columns in a grid of plots.}
|
||||||
|
|
||||||
|
\item{col}{color of the scatterplot markers.}
|
||||||
|
|
||||||
|
\item{pch}{scatterplot marker.}
|
||||||
|
|
||||||
|
\item{discrete_n_uniq}{a maximal number of unique values in a feature to consider it as discrete.}
|
||||||
|
|
||||||
|
\item{discrete_jitter}{an \code{amount} parameter of jitter added to discrete features' positions.}
|
||||||
|
|
||||||
|
\item{ylab}{a y-axis label in 1D plots.}
|
||||||
|
|
||||||
|
\item{plot_NA}{whether the contributions of cases with missing values should also be plotted.}
|
||||||
|
|
||||||
|
\item{col_NA}{a color of marker for missing value contributions.}
|
||||||
|
|
||||||
|
\item{pch_NA}{a marker type for NA values.}
|
||||||
|
|
||||||
|
\item{pos_NA}{a relative position of the x-location where NA values are shown:
|
||||||
|
\code{min(x) + (max(x) - min(x)) * pos_NA}.}
|
||||||
|
|
||||||
|
\item{plot_loess}{whether to plot loess-smoothed curves. The smoothing is only done for features with
|
||||||
|
more than 5 distinct values.}
|
||||||
|
|
||||||
|
\item{col_loess}{a color to use for the loess curves.}
|
||||||
|
|
||||||
|
\item{span_loess}{the \code{span} paramerer in \code{\link[stats]{loess}}'s call.}
|
||||||
|
|
||||||
|
\item{which}{whether to do univariate or bivariate plotting. NOTE: only 1D is implemented so far.}
|
||||||
|
|
||||||
|
\item{plot}{whether a plot should be drawn. If FALSE, only a lits of matrices is returned.}
|
||||||
|
|
||||||
|
\item{...}{other parameters passed to \code{plot}.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
In addition to producing plots (when \code{plot=TRUE}), it silently returns a list of two matrices:
|
||||||
|
\itemize{
|
||||||
|
\item \code{data} the values of selected features;
|
||||||
|
\item \code{shap_contrib} the contributions of selected features.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Visualizing the SHAP feature contribution to prediction dependencies on feature value.
|
||||||
|
}
|
||||||
|
\details{
|
||||||
|
These scatterplots represent how SHAP feature contributions depend of feature values.
|
||||||
|
The similarity to partial dependency plots is that they also give an idea for how feature values
|
||||||
|
affect predictions. However, in partial dependency plots, we usually see marginal dependencies
|
||||||
|
of model prediction on feature value, while SHAP contribution dependency plots display the estimated
|
||||||
|
contributions of a feature to model prediction for each individual case.
|
||||||
|
|
||||||
|
When \code{plot_loess = TRUE} is set, feature values are rounded to 3 significant digits and
|
||||||
|
weighted LOESS is computed and plotted, where weights are the numbers of data points
|
||||||
|
at each rounded value.
|
||||||
|
|
||||||
|
Note: SHAP contributions are shown on the scale of model margin. E.g., for a logistic binomial objective,
|
||||||
|
the margin is prediction before a sigmoidal transform into probability-like values.
|
||||||
|
Also, since SHAP stands for "SHapley Additive exPlanation" (model prediction = sum of SHAP
|
||||||
|
contributions for all features + bias), depending on the objective used, transforming SHAP
|
||||||
|
contributions for a feature from the marginal to the prediction space is not necessarily
|
||||||
|
a meaningful thing to do.
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
|
||||||
|
data(agaricus.train, package='xgboost')
|
||||||
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
|
||||||
|
eta = 0.1, max_depth = 3, subsample = .5,
|
||||||
|
method = "hist", objective = "binary:logistic", nthread = 2, verbose = 0)
|
||||||
|
|
||||||
|
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
||||||
|
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
||||||
|
xgb.plot.shap(agaricus.test$data, contr, model = bst, top_n = 12, n_col = 3)
|
||||||
|
|
||||||
|
# multiclass example - plots for each class separately:
|
||||||
|
nclass <- 3
|
||||||
|
nrounds <- 20
|
||||||
|
x <- as.matrix(iris[, -5])
|
||||||
|
set.seed(123)
|
||||||
|
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values
|
||||||
|
mbst <- xgboost(data = x, label = as.numeric(iris$Species) - 1, nrounds = nrounds,
|
||||||
|
max_depth = 2, eta = 0.3, subsample = .5, nthread = 2,
|
||||||
|
objective = "multi:softprob", num_class = nclass, verbose = 0)
|
||||||
|
trees0 <- seq(from=0, by=nclass, length.out=nrounds)
|
||||||
|
col <- rgb(0, 0, 1, 0.5)
|
||||||
|
xgb.plot.shap(x, model = mbst, trees = trees0, target_class = 0, top_n = 4, n_col = 2, col = col, pch = 16, pch_NA = 17)
|
||||||
|
xgb.plot.shap(x, model = mbst, trees = trees0 + 1, target_class = 1, top_n = 4, n_col = 2, col = col, pch = 16, pch_NA = 17)
|
||||||
|
xgb.plot.shap(x, model = mbst, trees = trees0 + 2, target_class = 2, top_n = 4, n_col = 2, col = col, pch = 16, pch_NA = 17)
|
||||||
|
|
||||||
|
}
|
||||||
|
\references{
|
||||||
|
Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions", NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
|
||||||
|
|
||||||
|
Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles", \url{https://arxiv.org/abs/1706.06060}
|
||||||
|
}
|
||||||
@ -258,6 +258,10 @@ bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label,
|
|||||||
objective = "binary:logistic")
|
objective = "binary:logistic")
|
||||||
pred <- predict(bst, agaricus.test$data)
|
pred <- predict(bst, agaricus.test$data)
|
||||||
|
|
||||||
|
}
|
||||||
|
\references{
|
||||||
|
Tianqi Chen and Carlos Guestrin, "XGBoost: A Scalable Tree Boosting System",
|
||||||
|
22nd SIGKDD Conference on Knowledge Discovery and Data Mining, 2016, \url{https://arxiv.org/abs/1603.02754}
|
||||||
}
|
}
|
||||||
\seealso{
|
\seealso{
|
||||||
\code{\link{callbacks}},
|
\code{\link{callbacks}},
|
||||||
|
|||||||
@ -81,6 +81,11 @@ test_that("predict feature contributions works", {
|
|||||||
expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
|
expect_equal(colnames(pred_contr), c(colnames(sparse_matrix), "BIAS"))
|
||||||
pred <- predict(bst.Tree, sparse_matrix, outputmargin = TRUE)
|
pred <- predict(bst.Tree, sparse_matrix, outputmargin = TRUE)
|
||||||
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
|
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
|
||||||
|
# must work with data that has no column names
|
||||||
|
X <- sparse_matrix
|
||||||
|
colnames(X) <- NULL
|
||||||
|
expect_error(pred_contr_ <- predict(bst.Tree, X, predcontrib = TRUE), regexp = NA)
|
||||||
|
expect_equal(pred_contr, pred_contr_, check.attributes = FALSE)
|
||||||
|
|
||||||
# gbtree binary classifier (approximate method)
|
# gbtree binary classifier (approximate method)
|
||||||
expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
|
expect_error(pred_contr <- predict(bst.Tree, sparse_matrix, predcontrib = TRUE, approxcontrib = TRUE), regexp = NA)
|
||||||
@ -289,6 +294,13 @@ test_that("xgb.plot.deepness works", {
|
|||||||
xgb.ggplot.deepness(model = bst.Tree)
|
xgb.ggplot.deepness(model = bst.Tree)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test_that("xgb.plot.shap works", {
|
||||||
|
sh <- xgb.plot.shap(data = sparse_matrix, model = bst.Tree, top_n = 2, col = 4)
|
||||||
|
expect_equal(names(sh), c("data", "shap_contrib"))
|
||||||
|
expect_equal(NCOL(sh$data), 2)
|
||||||
|
expect_equal(NCOL(sh$shap_contrib), 2)
|
||||||
|
})
|
||||||
|
|
||||||
test_that("check.deprecation works", {
|
test_that("check.deprecation works", {
|
||||||
ttt <- function(a = NNULL, DUMMY=NULL, ...) {
|
ttt <- function(a = NNULL, DUMMY=NULL, ...) {
|
||||||
check.deprecation(...)
|
check.deprecation(...)
|
||||||
|
|||||||
@ -78,7 +78,7 @@ function(setup_rpackage_install_target rlib_target build_dir)
|
|||||||
install(CODE "file(WRITE \"${build_dir}/R-package/src/Makevars\" \"all:\")")
|
install(CODE "file(WRITE \"${build_dir}/R-package/src/Makevars\" \"all:\")")
|
||||||
install(CODE "file(WRITE \"${build_dir}/R-package/src/Makevars.win\" \"all:\")")
|
install(CODE "file(WRITE \"${build_dir}/R-package/src/Makevars.win\" \"all:\")")
|
||||||
set(XGB_DEPS_SCRIPT
|
set(XGB_DEPS_SCRIPT
|
||||||
"deps = setdiff(c('statar','data.table', 'magrittr', 'stringi'), rownames(installed.packages()));\
|
"deps = setdiff(c('data.table', 'magrittr', 'stringi'), rownames(installed.packages()));\
|
||||||
if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')")
|
if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')")
|
||||||
install(CODE "execute_process(COMMAND \"${LIBR_EXECUTABLE}\" \"-q\" \"-e\" \"${XGB_DEPS_SCRIPT}\")")
|
install(CODE "execute_process(COMMAND \"${LIBR_EXECUTABLE}\" \"-q\" \"-e\" \"${XGB_DEPS_SCRIPT}\")")
|
||||||
install(CODE "execute_process(COMMAND \"${LIBR_EXECUTABLE}\" CMD INSTALL\
|
install(CODE "execute_process(COMMAND \"${LIBR_EXECUTABLE}\" CMD INSTALL\
|
||||||
|
|||||||
@ -516,7 +516,7 @@ class RegTree: public TreeModel<bst_float, RTreeNodeStat> {
|
|||||||
* \param out_contribs output vector to hold the contributions
|
* \param out_contribs output vector to hold the contributions
|
||||||
*/
|
*/
|
||||||
inline void CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
|
inline void CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
|
||||||
bst_float *out_contribs) const;
|
bst_float *out_contribs) const;
|
||||||
/*!
|
/*!
|
||||||
* \brief get next position of the tree given current pid
|
* \brief get next position of the tree given current pid
|
||||||
* \param pid Current node id.
|
* \param pid Current node id.
|
||||||
@ -619,7 +619,7 @@ inline bst_float RegTree::FillNodeMeanValue(int nid) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
|
inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, unsigned root_id,
|
||||||
bst_float *out_contribs) const {
|
bst_float *out_contribs) const {
|
||||||
CHECK_GT(this->node_mean_values.size(), 0U);
|
CHECK_GT(this->node_mean_values.size(), 0U);
|
||||||
// this follows the idea of http://blog.datadive.net/interpreting-random-forests/
|
// this follows the idea of http://blog.datadive.net/interpreting-random-forests/
|
||||||
bst_float node_value;
|
bst_float node_value;
|
||||||
@ -647,16 +647,16 @@ inline void RegTree::CalculateContributionsApprox(const RegTree::FVec& feat, uns
|
|||||||
|
|
||||||
// extend our decision path with a fraction of one and zero extensions
|
// extend our decision path with a fraction of one and zero extensions
|
||||||
inline void ExtendPath(PathElement *unique_path, unsigned unique_depth,
|
inline void ExtendPath(PathElement *unique_path, unsigned unique_depth,
|
||||||
bst_float zero_fraction, bst_float one_fraction, int feature_index) {
|
bst_float zero_fraction, bst_float one_fraction, int feature_index) {
|
||||||
unique_path[unique_depth].feature_index = feature_index;
|
unique_path[unique_depth].feature_index = feature_index;
|
||||||
unique_path[unique_depth].zero_fraction = zero_fraction;
|
unique_path[unique_depth].zero_fraction = zero_fraction;
|
||||||
unique_path[unique_depth].one_fraction = one_fraction;
|
unique_path[unique_depth].one_fraction = one_fraction;
|
||||||
unique_path[unique_depth].pweight = static_cast<bst_float>(unique_depth == 0 ? 1 : 0);
|
unique_path[unique_depth].pweight = (unique_depth == 0 ? 1.0f : 0.0f);
|
||||||
for (int i = unique_depth-1; i >= 0; i--) {
|
for (int i = unique_depth - 1; i >= 0; i--) {
|
||||||
unique_path[i+1].pweight += one_fraction*unique_path[i].pweight*(i+1)
|
unique_path[i+1].pweight += one_fraction * unique_path[i].pweight * (i + 1)
|
||||||
/ static_cast<bst_float>(unique_depth+1);
|
/ static_cast<bst_float>(unique_depth + 1);
|
||||||
unique_path[i].pweight = zero_fraction*unique_path[i].pweight*(unique_depth-i)
|
unique_path[i].pweight = zero_fraction * unique_path[i].pweight * (unique_depth - i)
|
||||||
/ static_cast<bst_float>(unique_depth+1);
|
/ static_cast<bst_float>(unique_depth + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -666,16 +666,16 @@ inline void UnwindPath(PathElement *unique_path, unsigned unique_depth, unsigned
|
|||||||
const bst_float zero_fraction = unique_path[path_index].zero_fraction;
|
const bst_float zero_fraction = unique_path[path_index].zero_fraction;
|
||||||
bst_float next_one_portion = unique_path[unique_depth].pweight;
|
bst_float next_one_portion = unique_path[unique_depth].pweight;
|
||||||
|
|
||||||
for (int i = unique_depth-1; i >= 0; --i) {
|
for (int i = unique_depth - 1; i >= 0; --i) {
|
||||||
if (one_fraction != 0) {
|
if (one_fraction != 0) {
|
||||||
const bst_float tmp = unique_path[i].pweight;
|
const bst_float tmp = unique_path[i].pweight;
|
||||||
unique_path[i].pweight = next_one_portion*(unique_depth+1)
|
unique_path[i].pweight = next_one_portion * (unique_depth + 1)
|
||||||
/ static_cast<bst_float>((i+1)*one_fraction);
|
/ static_cast<bst_float>((i + 1) * one_fraction);
|
||||||
next_one_portion = tmp - unique_path[i].pweight*zero_fraction*(unique_depth-i)
|
next_one_portion = tmp - unique_path[i].pweight * zero_fraction * (unique_depth - i)
|
||||||
/ static_cast<bst_float>(unique_depth+1);
|
/ static_cast<bst_float>(unique_depth + 1);
|
||||||
} else {
|
} else {
|
||||||
unique_path[i].pweight = (unique_path[i].pweight*(unique_depth+1))
|
unique_path[i].pweight = (unique_path[i].pweight * (unique_depth + 1))
|
||||||
/ static_cast<bst_float>(zero_fraction*(unique_depth-i));
|
/ static_cast<bst_float>(zero_fraction * (unique_depth - i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -694,16 +694,16 @@ inline bst_float UnwoundPathSum(const PathElement *unique_path, unsigned unique_
|
|||||||
const bst_float zero_fraction = unique_path[path_index].zero_fraction;
|
const bst_float zero_fraction = unique_path[path_index].zero_fraction;
|
||||||
bst_float next_one_portion = unique_path[unique_depth].pweight;
|
bst_float next_one_portion = unique_path[unique_depth].pweight;
|
||||||
bst_float total = 0;
|
bst_float total = 0;
|
||||||
for (int i = unique_depth-1; i >= 0; --i) {
|
for (int i = unique_depth - 1; i >= 0; --i) {
|
||||||
if (one_fraction != 0) {
|
if (one_fraction != 0) {
|
||||||
const bst_float tmp = next_one_portion*(unique_depth+1)
|
const bst_float tmp = next_one_portion * (unique_depth + 1)
|
||||||
/ static_cast<bst_float>((i+1)*one_fraction);
|
/ static_cast<bst_float>((i + 1) * one_fraction);
|
||||||
total += tmp;
|
total += tmp;
|
||||||
next_one_portion = unique_path[i].pweight - tmp*zero_fraction*((unique_depth-i)
|
next_one_portion = unique_path[i].pweight - tmp * zero_fraction * ((unique_depth - i)
|
||||||
/ static_cast<bst_float>(unique_depth+1));
|
/ static_cast<bst_float>(unique_depth+1));
|
||||||
} else {
|
} else {
|
||||||
total += (unique_path[i].pweight/zero_fraction)/((unique_depth-i)
|
total += (unique_path[i].pweight / zero_fraction) / ((unique_depth - i)
|
||||||
/ static_cast<bst_float>(unique_depth+1));
|
/ static_cast<bst_float>(unique_depth + 1));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return total;
|
return total;
|
||||||
@ -718,7 +718,8 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
|||||||
|
|
||||||
// extend the unique path
|
// extend the unique path
|
||||||
PathElement *unique_path = parent_unique_path + unique_depth;
|
PathElement *unique_path = parent_unique_path + unique_depth;
|
||||||
if (unique_depth > 0) std::copy(parent_unique_path, parent_unique_path+unique_depth, unique_path);
|
if (unique_depth > 0) std::copy(parent_unique_path,
|
||||||
|
parent_unique_path + unique_depth, unique_path);
|
||||||
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
|
ExtendPath(unique_path, unique_depth, parent_zero_fraction,
|
||||||
parent_one_fraction, parent_feature_index);
|
parent_one_fraction, parent_feature_index);
|
||||||
const unsigned split_index = node.split_index();
|
const unsigned split_index = node.split_index();
|
||||||
@ -728,7 +729,7 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
|||||||
for (unsigned i = 1; i <= unique_depth; ++i) {
|
for (unsigned i = 1; i <= unique_depth; ++i) {
|
||||||
const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
|
const bst_float w = UnwoundPathSum(unique_path, unique_depth, i);
|
||||||
const PathElement &el = unique_path[i];
|
const PathElement &el = unique_path[i];
|
||||||
phi[el.feature_index] += w*(el.one_fraction-el.zero_fraction)*node.leaf_value();
|
phi[el.feature_index] += w * (el.one_fraction - el.zero_fraction) * node.leaf_value();
|
||||||
}
|
}
|
||||||
|
|
||||||
// internal node
|
// internal node
|
||||||
@ -742,10 +743,11 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
|||||||
} else {
|
} else {
|
||||||
hot_index = node.cright();
|
hot_index = node.cright();
|
||||||
}
|
}
|
||||||
const unsigned cold_index = (hot_index == node.cleft() ? node.cright() : node.cleft());
|
const unsigned cold_index = (static_cast<int>(hot_index) == node.cleft() ?
|
||||||
|
node.cright() : node.cleft());
|
||||||
const bst_float w = this->stat(node_index).sum_hess;
|
const bst_float w = this->stat(node_index).sum_hess;
|
||||||
const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess/w;
|
const bst_float hot_zero_fraction = this->stat(hot_index).sum_hess / w;
|
||||||
const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess/w;
|
const bst_float cold_zero_fraction = this->stat(cold_index).sum_hess / w;
|
||||||
bst_float incoming_zero_fraction = 1;
|
bst_float incoming_zero_fraction = 1;
|
||||||
bst_float incoming_one_fraction = 1;
|
bst_float incoming_one_fraction = 1;
|
||||||
|
|
||||||
@ -753,19 +755,19 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
|||||||
// if so we undo that split so we can redo it for this node
|
// if so we undo that split so we can redo it for this node
|
||||||
unsigned path_index = 0;
|
unsigned path_index = 0;
|
||||||
for (; path_index <= unique_depth; ++path_index) {
|
for (; path_index <= unique_depth; ++path_index) {
|
||||||
if (unique_path[path_index].feature_index == split_index) break;
|
if (static_cast<unsigned>(unique_path[path_index].feature_index) == split_index) break;
|
||||||
}
|
}
|
||||||
if (path_index != unique_depth+1) {
|
if (path_index != unique_depth + 1) {
|
||||||
incoming_zero_fraction = unique_path[path_index].zero_fraction;
|
incoming_zero_fraction = unique_path[path_index].zero_fraction;
|
||||||
incoming_one_fraction = unique_path[path_index].one_fraction;
|
incoming_one_fraction = unique_path[path_index].one_fraction;
|
||||||
UnwindPath(unique_path, unique_depth, path_index);
|
UnwindPath(unique_path, unique_depth, path_index);
|
||||||
unique_depth -= 1;
|
unique_depth -= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
TreeShap(feat, phi, hot_index, unique_depth+1, unique_path,
|
TreeShap(feat, phi, hot_index, unique_depth + 1, unique_path,
|
||||||
hot_zero_fraction*incoming_zero_fraction, incoming_one_fraction, split_index);
|
hot_zero_fraction*incoming_zero_fraction, incoming_one_fraction, split_index);
|
||||||
|
|
||||||
TreeShap(feat, phi, cold_index, unique_depth+1, unique_path,
|
TreeShap(feat, phi, cold_index, unique_depth + 1, unique_path,
|
||||||
cold_zero_fraction*incoming_zero_fraction, 0, split_index);
|
cold_zero_fraction*incoming_zero_fraction, 0, split_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -773,21 +775,21 @@ inline void RegTree::TreeShap(const RegTree::FVec& feat, bst_float *phi,
|
|||||||
inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
|
inline void RegTree::CalculateContributions(const RegTree::FVec& feat, unsigned root_id,
|
||||||
bst_float *out_contribs) const {
|
bst_float *out_contribs) const {
|
||||||
// find the expected value of the tree's predictions
|
// find the expected value of the tree's predictions
|
||||||
bst_float base_value = 0.0;
|
bst_float base_value = 0.0f;
|
||||||
bst_float total_cover = 0;
|
bst_float total_cover = 0.0f;
|
||||||
for (int i = 0; i < (*this).param.num_nodes; ++i) {
|
for (int i = 0; i < (*this).param.num_nodes; ++i) {
|
||||||
const auto node = (*this)[i];
|
const auto node = (*this)[i];
|
||||||
if (node.is_leaf()) {
|
if (node.is_leaf()) {
|
||||||
const auto cover = this->stat(i).sum_hess;
|
const auto cover = this->stat(i).sum_hess;
|
||||||
base_value += cover*node.leaf_value();
|
base_value += cover * node.leaf_value();
|
||||||
total_cover += cover;
|
total_cover += cover;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out_contribs[feat.size()] += base_value / total_cover;
|
out_contribs[feat.size()] += base_value / total_cover;
|
||||||
|
|
||||||
// Preallocate space for the unique path data
|
// Preallocate space for the unique path data
|
||||||
const int maxd = this->MaxDepth(root_id)+1;
|
const int maxd = this->MaxDepth(root_id) + 1;
|
||||||
PathElement *unique_path_data = new PathElement[(maxd*(maxd+1))/2];
|
PathElement *unique_path_data = new PathElement[(maxd * (maxd + 1)) / 2];
|
||||||
|
|
||||||
TreeShap(feat, out_contribs, root_id, 0, unique_path_data, 1, 1, -1);
|
TreeShap(feat, out_contribs, root_id, 0, unique_path_data, 1, 1, -1);
|
||||||
delete[] unique_path_data;
|
delete[] unique_path_data;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user