xgboost/R-package/man/xgb.plot.shap.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.plot.shap.R
\name{xgb.plot.shap}
\alias{xgb.plot.shap}
\title{SHAP dependence plots}
\usage{
xgb.plot.shap(
  data,
  shap_contrib = NULL,
  features = NULL,
  top_n = 1,
  model = NULL,
  trees = NULL,
  target_class = NULL,
  approxcontrib = FALSE,
  subsample = NULL,
  n_col = 1,
  col = rgb(0, 0, 1, 0.2),
  pch = ".",
  discrete_n_uniq = 5,
  discrete_jitter = 0.01,
  ylab = "SHAP",
  plot_NA = TRUE,
  col_NA = rgb(0.7, 0, 1, 0.6),
  pch_NA = ".",
  pos_NA = 1.07,
  plot_loess = TRUE,
  col_loess = 2,
  span_loess = 0.5,
  which = c("1d", "2d"),
  plot = TRUE,
  ...
)
}
\arguments{
\item{data}{The data to explain as a \code{matrix} or \code{dgCMatrix}.}

\item{shap_contrib}{Matrix of SHAP contributions of \code{data}.
The default (\code{NULL}) computes it from \code{model} and \code{data}.}

\item{features}{Vector of column indices or feature names to plot.
When \code{NULL} (default), the \code{top_n} most important features are selected
by \code{\link[=xgb.importance]{xgb.importance()}}.}

\item{top_n}{How many of the most important features (<= 100) should be selected?
By default 1 for SHAP dependence and 10 for SHAP summary).
Only used when \code{features = NULL}.}

\item{model}{An \code{xgb.Booster} model. Only required when \code{shap_contrib = NULL} or
\code{features = NULL}.}

\item{trees}{Passed to \code{\link[=xgb.importance]{xgb.importance()}} when \code{features = NULL}.}

\item{target_class}{Only relevant for multiclass models. The default (\code{NULL})
averages the SHAP values over all classes. Pass a (0-based) class index
to show only SHAP values of that class.}

\item{approxcontrib}{Passed to \code{predict()} when \code{shap_contrib = NULL}.}

\item{subsample}{Fraction of data points randomly picked for plotting.
The default (\code{NULL}) will use up to 100k data points.}

\item{n_col}{Number of columns in a grid of plots.}

\item{col}{Color of the scatterplot markers.}

\item{pch}{Scatterplot marker.}

\item{discrete_n_uniq}{Maximal number of unique feature values to consider the
feature as discrete.}

\item{discrete_jitter}{Jitter amount added to the values of discrete features.}

\item{ylab}{The y-axis label in 1D plots.}

\item{plot_NA}{Should contributions of cases with missing values be plotted?
Default is \code{TRUE}.}

\item{col_NA}{Color of marker for missing value contributions.}

\item{pch_NA}{Marker type for \code{NA} values.}

\item{pos_NA}{Relative position of the x-location where \code{NA} values are shown:
\code{min(x) + (max(x) - min(x)) * pos_NA}.}

\item{plot_loess}{Should loess-smoothed curves be plotted? (Default is \code{TRUE}).
The smoothing is only done for features with more than 5 distinct values.}

\item{col_loess}{Color of loess curves.}

\item{span_loess}{The \code{span} parameter of \code{\link[stats:loess]{stats::loess()}}.}

\item{which}{Whether to do univariate or bivariate plotting. Currently, only "1d" is implemented.}

\item{plot}{Should the plot be drawn? (Default is \code{TRUE}).
If \code{FALSE}, only a list of matrices is returned.}

\item{...}{Other parameters passed to \code{\link[graphics:plot.default]{graphics::plot()}}.}
}
\value{
In addition to producing plots (when \code{plot = TRUE}), it silently returns a list of two matrices:
\itemize{
\item \code{data}: Feature value matrix.
\item \code{shap_contrib}: Corresponding SHAP value matrix.
}
}
\description{
Visualizes SHAP values against feature values to gain an impression of feature effects.
}
\details{
These scatterplots represent how SHAP feature contributions depend of feature values.
The similarity to partial dependence plots is that they also give an idea for how feature values
affect predictions. However, in partial dependence plots, we see marginal dependencies
of model prediction on feature value, while SHAP dependence plots display the estimated
contributions of a feature to the prediction for each individual case.

When \code{plot_loess = TRUE}, feature values are rounded to three significant digits and
weighted LOESS is computed and plotted, where the weights are the numbers of data points
at each rounded value.

Note: SHAP contributions are on the scale of the model margin.
E.g., for a logistic binomial objective, the margin is on log-odds scale.
Also, since SHAP stands for "SHapley Additive exPlanation" (model prediction = sum of SHAP
contributions for all features + bias), depending on the objective used, transforming SHAP
contributions for a feature from the marginal to the prediction space is not necessarily
a meaningful thing to do.
}
\examples{

data(agaricus.train, package = "xgboost")
data(agaricus.test, package = "xgboost")

## Keep the number of threads to 1 for examples
nthread <- 1
data.table::setDTthreads(nthread)
nrounds <- 20

bst <- xgb.train(
  data = xgb.DMatrix(agaricus.train$data, agaricus.train$label),
  nrounds = nrounds,
  eta = 0.1,
  max_depth = 3,
  subsample = 0.5,
  objective = "binary:logistic",
  nthread = nthread,
  verbose = 0
)

xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")

contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
xgb.plot.shap(agaricus.test$data, contr, model = bst, top_n = 12, n_col = 3)

# Summary plot
xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12)

# Multiclass example - plots for each class separately:
nclass <- 3
x <- as.matrix(iris[, -5])
set.seed(123)
is.na(x[sample(nrow(x) * 4, 30)]) <- TRUE # introduce some missing values

mbst <- xgb.train(
  data = xgb.DMatrix(x, label = as.numeric(iris$Species) - 1),
  nrounds = nrounds,
  max_depth = 2,
  eta = 0.3,
  subsample = 0.5,
  nthread = nthread,
  objective = "multi:softprob",
  num_class = nclass,
  verbose = 0
)
trees0 <- seq(from = 0, by = nclass, length.out = nrounds)
col <- rgb(0, 0, 1, 0.5)
xgb.plot.shap(
  x,
  model = mbst,
  trees = trees0,
  target_class = 0,
  top_n = 4,
  n_col = 2,
  col = col,
  pch = 16,
  pch_NA = 17
)

xgb.plot.shap(
  x,
  model = mbst,
  trees = trees0 + 1,
  target_class = 1,
  top_n = 4,
  n_col = 2,
  col = col,
  pch = 16,
  pch_NA = 17
)

xgb.plot.shap(
  x,
  model = mbst,
  trees = trees0 + 2,
  target_class = 2,
  top_n = 4,
  n_col = 2,
  col = col,
  pch = 16,
  pch_NA = 17
)

# Summary plot
xgb.ggplot.shap.summary(x, model = mbst, target_class = 0, top_n = 4)

}
\references{
\enumerate{
\item Scott M. Lundberg, Su-In Lee, "A Unified Approach to Interpreting Model Predictions",
NIPS Proceedings 2017, \url{https://arxiv.org/abs/1705.07874}
\item Scott M. Lundberg, Su-In Lee, "Consistent feature attribution for tree ensembles",
\url{https://arxiv.org/abs/1706.06060}
}
}