* update for 1.2.0 cran submission * recover cmakelists * fix unittest from the shap PR * trigger CI
79 lines
2.6 KiB
R
79 lines
2.6 KiB
R
% Generated by roxygen2: do not edit by hand
|
|
% Please edit documentation in R/xgb.ggplot.R, R/xgb.plot.shap.R
|
|
\name{xgb.ggplot.shap.summary}
|
|
\alias{xgb.ggplot.shap.summary}
|
|
\alias{xgb.plot.shap.summary}
|
|
\title{SHAP contribution dependency summary plot}
|
|
\usage{
|
|
xgb.ggplot.shap.summary(
|
|
data,
|
|
shap_contrib = NULL,
|
|
features = NULL,
|
|
top_n = 10,
|
|
model = NULL,
|
|
trees = NULL,
|
|
target_class = NULL,
|
|
approxcontrib = FALSE,
|
|
subsample = NULL
|
|
)
|
|
|
|
xgb.plot.shap.summary(
|
|
data,
|
|
shap_contrib = NULL,
|
|
features = NULL,
|
|
top_n = 10,
|
|
model = NULL,
|
|
trees = NULL,
|
|
target_class = NULL,
|
|
approxcontrib = FALSE,
|
|
subsample = NULL
|
|
)
|
|
}
|
|
\arguments{
|
|
\item{data}{data as a \code{matrix} or \code{dgCMatrix}.}
|
|
|
|
\item{shap_contrib}{a matrix of SHAP contributions that was computed earlier for the above
|
|
\code{data}. When it is NULL, it is computed internally using \code{model} and \code{data}.}
|
|
|
|
\item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
|
|
feature importance is calculated, and \code{top_n} high ranked features are taken.}
|
|
|
|
\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
|
|
|
|
\item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
|
|
or \code{features} is missing.}
|
|
|
|
\item{trees}{passed to \code{\link{xgb.importance}} when \code{features = NULL}.}
|
|
|
|
\item{target_class}{is only relevant for multiclass models. When it is set to a 0-based class index,
|
|
only SHAP contributions for that specific class are used.
|
|
If it is not set, SHAP importances are averaged over all classes.}
|
|
|
|
\item{approxcontrib}{passed to \code{\link{predict.xgb.Booster}} when \code{shap_contrib = NULL}.}
|
|
|
|
\item{subsample}{a random fraction of data points to use for plotting. When it is NULL,
|
|
it is set so that up to 100K data points are used.}
|
|
}
|
|
\value{
|
|
A \code{ggplot2} object.
|
|
}
|
|
\description{
|
|
Compare SHAP contributions of different features.
|
|
}
|
|
\details{
|
|
A point plot (each point representing one sample from \code{data}) is
|
|
produced for each feature, with the points plotted on the SHAP value axis.
|
|
Each point (observation) is coloured based on its feature value. The plot
|
|
hence allows us to see which features have a negative / positive contribution
|
|
on the model prediction, and whether the contribution is different for larger
|
|
or smaller values of the feature. We effectively try to replicate the
|
|
\code{summary_plot} function from https://github.com/slundberg/shap.
|
|
}
|
|
\examples{
|
|
# See \code{\link{xgb.plot.shap}}.
|
|
}
|
|
\seealso{
|
|
\code{\link{xgb.plot.shap}}, \code{\link{xgb.ggplot.shap.summary}},
|
|
\url{https://github.com/slundberg/shap}
|
|
}
|