xgboost/R-package/man/xgb.plot.shap.summary.Rd
2024-09-02 19:44:12 +08:00

83 lines
2.6 KiB
R

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.ggplot.R, R/xgb.plot.shap.R
\name{xgb.ggplot.shap.summary}
\alias{xgb.ggplot.shap.summary}
\alias{xgb.plot.shap.summary}
\title{SHAP summary plot}
\usage{
xgb.ggplot.shap.summary(
data,
shap_contrib = NULL,
features = NULL,
top_n = 10,
model = NULL,
trees = NULL,
target_class = NULL,
approxcontrib = FALSE,
subsample = NULL
)
xgb.plot.shap.summary(
data,
shap_contrib = NULL,
features = NULL,
top_n = 10,
model = NULL,
trees = NULL,
target_class = NULL,
approxcontrib = FALSE,
subsample = NULL
)
}
\arguments{
\item{data}{The data to explain as a \code{matrix}, \code{dgCMatrix}, or \code{data.frame}.}
\item{shap_contrib}{Matrix of SHAP contributions of \code{data}.
The default (\code{NULL}) computes it from \code{model} and \code{data}.}
\item{features}{Vector of column indices or feature names to plot. When \code{NULL}
(default), the \code{top_n} most important features are selected by \code{\link[=xgb.importance]{xgb.importance()}}.}
\item{top_n}{How many of the most important features (<= 100) should be selected?
By default 1 for SHAP dependence and 10 for SHAP summary.
Only used when \code{features = NULL}.}
\item{model}{An \code{xgb.Booster} model. Only required when \code{shap_contrib = NULL} or
\code{features = NULL}.}
\item{trees}{Passed to \code{\link[=xgb.importance]{xgb.importance()}} when \code{features = NULL}.}
\item{target_class}{Only relevant for multiclass models. The default (\code{NULL})
averages the SHAP values over all classes. Pass a (0-based) class index
to show only SHAP values of that class.}
\item{approxcontrib}{Passed to \code{predict()} when \code{shap_contrib = NULL}.}
\item{subsample}{Fraction of data points randomly picked for plotting.
The default (\code{NULL}) will use up to 100k data points.}
}
\value{
A \code{ggplot2} object.
}
\description{
Visualizes SHAP contributions of different features.
}
\details{
A point plot (each point representing one observation from \code{data}) is
produced for each feature, with the points plotted on the SHAP value axis.
Each point (observation) is coloured based on its feature value.
The plot allows to see which features have a negative / positive contribution
on the model prediction, and whether the contribution is different for larger
or smaller values of the feature. Inspired by the summary plot of
\url{https://github.com/shap/shap}.
}
\examples{
# See examples in xgb.plot.shap()
}
\seealso{
\code{\link[=xgb.plot.shap]{xgb.plot.shap()}}, \code{\link[=xgb.ggplot.shap.summary]{xgb.ggplot.shap.summary()}},
and the Python library \url{https://github.com/shap/shap}.
}