add support of GLM model in importance plot function
This commit is contained in:
parent
a1c0ee0e66
commit
d04f7005de
@ -1,6 +1,6 @@
|
|||||||
#' Plot feature importance bar graph
|
#' Plot feature importance bar graph
|
||||||
#'
|
#'
|
||||||
#' Read a data.table containing feature importance details and plot it.
|
#' Read a data.table containing feature importance details and plot it (for both GLM and Trees).
|
||||||
#'
|
#'
|
||||||
#' @importFrom magrittr %>%
|
#' @importFrom magrittr %>%
|
||||||
#' @param importance_matrix a \code{data.table} returned by the \code{xgb.importance} function.
|
#' @param importance_matrix a \code{data.table} returned by the \code{xgb.importance} function.
|
||||||
@ -10,7 +10,7 @@
|
|||||||
#'
|
#'
|
||||||
#' @details
|
#' @details
|
||||||
#' The purpose of this function is to easily represent the importance of each feature of a model.
|
#' The purpose of this function is to easily represent the importance of each feature of a model.
|
||||||
#' The function return a ggplot graph, therefore each of its characteristic can be overriden (to customize it).
|
#' The function returns a ggplot graph, therefore each of its characteristic can be overriden (to customize it).
|
||||||
#' In particular you may want to override the title of the graph. To do so, add \code{+ ggtitle("A GRAPH NAME")} next to the value returned by this function.
|
#' In particular you may want to override the title of the graph. To do so, add \code{+ ggtitle("A GRAPH NAME")} next to the value returned by this function.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
@ -40,21 +40,29 @@ xgb.plot.importance <-
|
|||||||
stop("Ckmeans.1d.dp package is required for plotting the importance", call. = FALSE)
|
stop("Ckmeans.1d.dp package is required for plotting the importance", call. = FALSE)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(isTRUE(all.equal(colnames(importance_matrix), c("Feature", "Gain", "Cover", "Frequency")))){
|
||||||
|
y.axe.name <- "Gain"
|
||||||
|
} else if(isTRUE(all.equal(colnames(importance_matrix), c("Feature", "Weight")))){
|
||||||
|
y.axe.name <- "Weight"
|
||||||
|
} else {
|
||||||
|
stop("Importance matrix is not correct (column names issue)")
|
||||||
|
}
|
||||||
|
|
||||||
# To avoid issues in clustering when co-occurences are used
|
# To avoid issues in clustering when co-occurences are used
|
||||||
importance_matrix <-
|
importance_matrix <-
|
||||||
importance_matrix[, .(Gain = sum(Gain)), by = Feature]
|
importance_matrix[, .(Gain.or.Weight = sum(get(y.axe.name))), by = Feature]
|
||||||
|
|
||||||
clusters <-
|
clusters <-
|
||||||
suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
|
suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain.or.Weight], numberOfClusters))
|
||||||
importance_matrix[,"Cluster":= clusters$cluster %>% as.character]
|
importance_matrix[,"Cluster":= clusters$cluster %>% as.character]
|
||||||
|
|
||||||
plot <-
|
plot <-
|
||||||
ggplot2::ggplot(
|
ggplot2::ggplot(
|
||||||
importance_matrix, ggplot2::aes(
|
importance_matrix, ggplot2::aes(
|
||||||
x = stats::reorder(Feature, Gain), y = Gain, width = 0.05
|
x = stats::reorder(Feature, Gain.or.Weight), y = Gain.or.Weight, width = 0.05
|
||||||
), environment = environment()
|
), environment = environment()
|
||||||
) + ggplot2::geom_bar(ggplot2::aes(fill = Cluster), stat = "identity", position =
|
) + ggplot2::geom_bar(ggplot2::aes(fill = Cluster), stat = "identity", position =
|
||||||
"identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab("Gain") + ggplot2::ggtitle("Feature importance") + ggplot2::theme(
|
"identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab(y.axe.name) + ggplot2::ggtitle("Feature importance") + ggplot2::theme(
|
||||||
plot.title = ggplot2::element_text(lineheight = .9, face = "bold"), panel.grid.major.y = ggplot2::element_blank()
|
plot.title = ggplot2::element_text(lineheight = .9, face = "bold"), panel.grid.major.y = ggplot2::element_blank()
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -66,6 +74,6 @@ xgb.plot.importance <-
|
|||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(
|
globalVariables(
|
||||||
c(
|
c(
|
||||||
"Feature", "Gain", "Cluster", "ggplot", "aes", "geom_bar", "coord_flip", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text"
|
"Feature", "Gain.or.Weight", "Cluster", "ggplot", "aes", "geom_bar", "coord_flip", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "Gain.or.Weight"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@ -15,11 +15,11 @@ xgb.plot.importance(importance_matrix = NULL, numberOfClusters = c(1:10))
|
|||||||
A \code{ggplot2} bar graph representing each feature by a horizontal bar. Longer is the bar, more important is the feature. Features are classified by importance and clustered by importance. The group is represented through the color of the bar.
|
A \code{ggplot2} bar graph representing each feature by a horizontal bar. Longer is the bar, more important is the feature. Features are classified by importance and clustered by importance. The group is represented through the color of the bar.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Read a data.table containing feature importance details and plot it.
|
Read a data.table containing feature importance details and plot it (for both GLM and Trees).
|
||||||
}
|
}
|
||||||
\details{
|
\details{
|
||||||
The purpose of this function is to easily represent the importance of each feature of a model.
|
The purpose of this function is to easily represent the importance of each feature of a model.
|
||||||
The function return a ggplot graph, therefore each of its characteristic can be overriden (to customize it).
|
The function returns a ggplot graph, therefore each of its characteristic can be overriden (to customize it).
|
||||||
In particular you may want to override the title of the graph. To do so, add \code{+ ggtitle("A GRAPH NAME")} next to the value returned by this function.
|
In particular you may want to override the title of the graph. To do so, add \code{+ ggtitle("A GRAPH NAME")} next to the value returned by this function.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user