142 lines
3.9 KiB
R
142 lines
3.9 KiB
R
% Generated by roxygen2: do not edit by hand
|
|
% Please edit documentation in R/xgb.importance.R
|
|
\name{xgb.importance}
|
|
\alias{xgb.importance}
|
|
\title{Feature importance}
|
|
\usage{
|
|
xgb.importance(
|
|
model = NULL,
|
|
feature_names = getinfo(model, "feature_name"),
|
|
trees = NULL,
|
|
data = NULL,
|
|
label = NULL,
|
|
target = NULL
|
|
)
|
|
}
|
|
\arguments{
|
|
\item{model}{Object of class \code{xgb.Booster}.}
|
|
|
|
\item{feature_names}{Character vector used to overwrite the feature names
|
|
of the model. The default is \code{NULL} (use original feature names).}
|
|
|
|
\item{trees}{An integer vector of tree indices that should be included
|
|
into the importance calculation (only for the "gbtree" booster).
|
|
The default (\code{NULL}) parses all trees.
|
|
It could be useful, e.g., in multiclass classification to get feature importances
|
|
for each class separately. \emph{Important}: the tree index in XGBoost models
|
|
is zero-based (e.g., use \code{trees = 0:4} for the first five trees).}
|
|
|
|
\item{data}{Deprecated.}
|
|
|
|
\item{label}{Deprecated.}
|
|
|
|
\item{target}{Deprecated.}
|
|
}
|
|
\value{
|
|
A \code{data.table} with the following columns:
|
|
|
|
For a tree model:
|
|
\itemize{
|
|
\item \code{Features}: Names of the features used in the model.
|
|
\item \code{Gain}: Fractional contribution of each feature to the model based on
|
|
the total gain of this feature's splits. Higher percentage means higher importance.
|
|
\item \code{Cover}: Metric of the number of observation related to this feature.
|
|
\item \code{Frequency}: Percentage of times a feature has been used in trees.
|
|
}
|
|
|
|
For a linear model:
|
|
\itemize{
|
|
\item \code{Features}: Names of the features used in the model.
|
|
\item \code{Weight}: Linear coefficient of this feature.
|
|
\item \code{Class}: Class label (only for multiclass models).
|
|
}
|
|
|
|
If \code{feature_names} is not provided and \code{model} doesn't have \code{feature_names},
|
|
the index of the features will be used instead. Because the index is extracted from the model dump
|
|
(based on C++ code), it starts at 0 (as in C/C++ or Python) instead of 1 (usual in R).
|
|
}
|
|
\description{
|
|
Creates a \code{data.table} of feature importances.
|
|
}
|
|
\details{
|
|
This function works for both linear and tree models.
|
|
|
|
For linear models, the importance is the absolute magnitude of linear coefficients.
|
|
To obtain a meaningful ranking by importance for linear models, the features need to
|
|
be on the same scale (which is also recommended when using L1 or L2 regularization).
|
|
}
|
|
\examples{
|
|
|
|
# binomial classification using "gbtree":
|
|
data(agaricus.train, package = "xgboost")
|
|
|
|
bst <- xgb.train(
|
|
data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
|
max_depth = 2,
|
|
eta = 1,
|
|
nthread = 2,
|
|
nrounds = 2,
|
|
objective = "binary:logistic"
|
|
)
|
|
|
|
xgb.importance(model = bst)
|
|
|
|
# binomial classification using "gblinear":
|
|
bst <- xgb.train(
|
|
data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
|
booster = "gblinear",
|
|
eta = 0.3,
|
|
nthread = 1,
|
|
nrounds = 20,objective = "binary:logistic"
|
|
)
|
|
|
|
xgb.importance(model = bst)
|
|
|
|
# multiclass classification using "gbtree":
|
|
nclass <- 3
|
|
nrounds <- 10
|
|
mbst <- xgb.train(
|
|
data = xgb.DMatrix(
|
|
as.matrix(iris[, -5]),
|
|
label = as.numeric(iris$Species) - 1
|
|
),
|
|
max_depth = 3,
|
|
eta = 0.2,
|
|
nthread = 2,
|
|
nrounds = nrounds,
|
|
objective = "multi:softprob",
|
|
num_class = nclass
|
|
)
|
|
|
|
# all classes clumped together:
|
|
xgb.importance(model = mbst)
|
|
|
|
# inspect importances separately for each class:
|
|
xgb.importance(
|
|
model = mbst, trees = seq(from = 0, by = nclass, length.out = nrounds)
|
|
)
|
|
xgb.importance(
|
|
model = mbst, trees = seq(from = 1, by = nclass, length.out = nrounds)
|
|
)
|
|
xgb.importance(
|
|
model = mbst, trees = seq(from = 2, by = nclass, length.out = nrounds)
|
|
)
|
|
|
|
# multiclass classification using "gblinear":
|
|
mbst <- xgb.train(
|
|
data = xgb.DMatrix(
|
|
scale(as.matrix(iris[, -5])),
|
|
label = as.numeric(iris$Species) - 1
|
|
),
|
|
booster = "gblinear",
|
|
eta = 0.2,
|
|
nthread = 1,
|
|
nrounds = 15,
|
|
objective = "multi:softprob",
|
|
num_class = nclass
|
|
)
|
|
|
|
xgb.importance(model = mbst)
|
|
|
|
}
|