Updates from 1.2.0 cran submission (#6077)
* update for 1.2.0 cran submission * recover cmakelists * fix unittest from the shap PR * trigger CI
This commit is contained in:
@@ -43,6 +43,7 @@ bst2 <- xgb.load('xgb.model')
|
||||
# Save as a stand-alone file (JSON); load it with xgb.load()
|
||||
xgb.save(bst, 'xgb.model.json')
|
||||
bst2 <- xgb.load('xgb.model.json')
|
||||
if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
|
||||
|
||||
# Save as a raw byte vector; load it with xgb.load.raw()
|
||||
xgb_bytes <- xgb.save.raw(bst)
|
||||
@@ -58,5 +59,6 @@ saveRDS(obj, 'my_object.rds')
|
||||
obj2 <- readRDS('my_object.rds')
|
||||
# Re-construct xgb.Booster object from the bytes
|
||||
bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
|
||||
if (file.exists('my_object.rds')) file.remove('my_object.rds')
|
||||
|
||||
}
|
||||
|
||||
18
R-package/man/normalize.Rd
Normal file
18
R-package/man/normalize.Rd
Normal file
@@ -0,0 +1,18 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.ggplot.R
|
||||
\name{normalize}
|
||||
\alias{normalize}
|
||||
\title{Scale feature value to have mean 0, standard deviation 1}
|
||||
\usage{
|
||||
normalize(x)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Numeric vector}
|
||||
}
|
||||
\value{
|
||||
Numeric vector with mean 0 and sd 1.
|
||||
}
|
||||
\description{
|
||||
This is used to compare multiple features on the same plot.
|
||||
Internal utility function
|
||||
}
|
||||
27
R-package/man/prepare.ggplot.shap.data.Rd
Normal file
27
R-package/man/prepare.ggplot.shap.data.Rd
Normal file
@@ -0,0 +1,27 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.ggplot.R
|
||||
\name{prepare.ggplot.shap.data}
|
||||
\alias{prepare.ggplot.shap.data}
|
||||
\title{Combine and melt feature values and SHAP contributions for sample
|
||||
observations.}
|
||||
\usage{
|
||||
prepare.ggplot.shap.data(data_list, normalize = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{data_list}{List containing 'data' and 'shap_contrib' returned by
|
||||
\code{xgb.shap.data()}.}
|
||||
|
||||
\item{normalize}{Whether to standardize feature values to have mean 0 and
|
||||
standard deviation 1 (useful for comparing multiple features on the same
|
||||
plot). Default \code{FALSE}.}
|
||||
}
|
||||
\value{
|
||||
A data.table containing the observation ID, the feature name, the
|
||||
feature value (normalized if specified), and the SHAP contribution value.
|
||||
}
|
||||
\description{
|
||||
Conforms to data format required for ggplot functions.
|
||||
}
|
||||
\details{
|
||||
Internal utility function.
|
||||
}
|
||||
@@ -154,7 +154,7 @@ The cross-validation process is then repeated \code{nrounds} times, with each of
|
||||
|
||||
All observations are used for both training and validation.
|
||||
|
||||
Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
|
||||
Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
|
||||
@@ -131,6 +131,7 @@ bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
|
||||
xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
|
||||
contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
|
||||
xgb.plot.shap(agaricus.test$data, contr, model = bst, top_n = 12, n_col = 3)
|
||||
xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12) # Summary plot
|
||||
|
||||
# multiclass example - plots for each class separately:
|
||||
nclass <- 3
|
||||
@@ -149,6 +150,7 @@ xgb.plot.shap(x, model = mbst, trees = trees0 + 1, target_class = 1, top_n = 4,
|
||||
n_col = 2, col = col, pch = 16, pch_NA = 17)
|
||||
xgb.plot.shap(x, model = mbst, trees = trees0 + 2, target_class = 2, top_n = 4,
|
||||
n_col = 2, col = col, pch = 16, pch_NA = 17)
|
||||
xgb.ggplot.shap.summary(x, model = mbst, target_class = 0, top_n = 4) # Summary plot
|
||||
|
||||
}
|
||||
\references{
|
||||
|
||||
78
R-package/man/xgb.plot.shap.summary.Rd
Normal file
78
R-package/man/xgb.plot.shap.summary.Rd
Normal file
@@ -0,0 +1,78 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.ggplot.R, R/xgb.plot.shap.R
|
||||
\name{xgb.ggplot.shap.summary}
|
||||
\alias{xgb.ggplot.shap.summary}
|
||||
\alias{xgb.plot.shap.summary}
|
||||
\title{SHAP contribution dependency summary plot}
|
||||
\usage{
|
||||
xgb.ggplot.shap.summary(
|
||||
data,
|
||||
shap_contrib = NULL,
|
||||
features = NULL,
|
||||
top_n = 10,
|
||||
model = NULL,
|
||||
trees = NULL,
|
||||
target_class = NULL,
|
||||
approxcontrib = FALSE,
|
||||
subsample = NULL
|
||||
)
|
||||
|
||||
xgb.plot.shap.summary(
|
||||
data,
|
||||
shap_contrib = NULL,
|
||||
features = NULL,
|
||||
top_n = 10,
|
||||
model = NULL,
|
||||
trees = NULL,
|
||||
target_class = NULL,
|
||||
approxcontrib = FALSE,
|
||||
subsample = NULL
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{data as a \code{matrix} or \code{dgCMatrix}.}
|
||||
|
||||
\item{shap_contrib}{a matrix of SHAP contributions that was computed earlier for the above
|
||||
\code{data}. When it is NULL, it is computed internally using \code{model} and \code{data}.}
|
||||
|
||||
\item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
|
||||
feature importance is calculated, and \code{top_n} high ranked features are taken.}
|
||||
|
||||
\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
|
||||
|
||||
\item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
|
||||
or \code{features} is missing.}
|
||||
|
||||
\item{trees}{passed to \code{\link{xgb.importance}} when \code{features = NULL}.}
|
||||
|
||||
\item{target_class}{is only relevant for multiclass models. When it is set to a 0-based class index,
|
||||
only SHAP contributions for that specific class are used.
|
||||
If it is not set, SHAP importances are averaged over all classes.}
|
||||
|
||||
\item{approxcontrib}{passed to \code{\link{predict.xgb.Booster}} when \code{shap_contrib = NULL}.}
|
||||
|
||||
\item{subsample}{a random fraction of data points to use for plotting. When it is NULL,
|
||||
it is set so that up to 100K data points are used.}
|
||||
}
|
||||
\value{
|
||||
A \code{ggplot2} object.
|
||||
}
|
||||
\description{
|
||||
Compare SHAP contributions of different features.
|
||||
}
|
||||
\details{
|
||||
A point plot (each point representing one sample from \code{data}) is
|
||||
produced for each feature, with the points plotted on the SHAP value axis.
|
||||
Each point (observation) is coloured based on its feature value. The plot
|
||||
hence allows us to see which features have a negative / positive contribution
|
||||
on the model prediction, and whether the contribution is different for larger
|
||||
or smaller values of the feature. We effectively try to replicate the
|
||||
\code{summary_plot} function from https://github.com/slundberg/shap.
|
||||
}
|
||||
\examples{
|
||||
# See \code{\link{xgb.plot.shap}}.
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{xgb.plot.shap}}, \code{\link{xgb.ggplot.shap.summary}},
|
||||
\url{https://github.com/slundberg/shap}
|
||||
}
|
||||
29
R-package/man/xgb.shap.data.Rd
Normal file
29
R-package/man/xgb.shap.data.Rd
Normal file
@@ -0,0 +1,29 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.plot.shap.R
|
||||
\name{xgb.shap.data}
|
||||
\alias{xgb.shap.data}
|
||||
\title{Prepare data for SHAP plots. To be used in xgb.plot.shap, xgb.plot.shap.summary, etc.
|
||||
Internal utility function.}
|
||||
\usage{
|
||||
xgb.shap.data(
|
||||
data,
|
||||
shap_contrib = NULL,
|
||||
features = NULL,
|
||||
top_n = 1,
|
||||
model = NULL,
|
||||
trees = NULL,
|
||||
target_class = NULL,
|
||||
approxcontrib = FALSE,
|
||||
subsample = NULL,
|
||||
max_observations = 1e+05
|
||||
)
|
||||
}
|
||||
\value{
|
||||
A list containing: 'data', a matrix containing sample observations
|
||||
and their feature values; 'shap_contrib', a matrix containing the SHAP contribution
|
||||
values for these observations.
|
||||
}
|
||||
\description{
|
||||
Prepare data for SHAP plots. To be used in xgb.plot.shap, xgb.plot.shap.summary, etc.
|
||||
Internal utility function.
|
||||
}
|
||||
@@ -215,16 +215,16 @@ User may set one or several \code{eval_metric} parameters.
|
||||
Note that when using a customized metric, only this single metric can be used.
|
||||
The following is the list of built-in metrics for which Xgboost provides optimized implementation:
|
||||
\itemize{
|
||||
\item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||
\item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
|
||||
\item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
|
||||
\item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
|
||||
\item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
|
||||
\item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
|
||||
\item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
||||
By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
|
||||
Different threshold (e.g., 0.) could be specified as "error@0."
|
||||
\item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
|
||||
\item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
||||
\item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
|
||||
\item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
|
||||
\item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG}
|
||||
\item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
|
||||
}
|
||||
|
||||
The following callbacks are automatically created when certain parameters are set:
|
||||
|
||||
Reference in New Issue
Block a user