88 lines
4.3 KiB
R
88 lines
4.3 KiB
R
#' Plot a boosted tree model
|
|
#'
|
|
#' Read a tree model text dump.
|
|
#' Plotting only works for boosted tree model (not linear model).
|
|
#'
|
|
#' @importFrom data.table data.table
|
|
#' @importFrom data.table set
|
|
#' @importFrom data.table rbindlist
|
|
#' @importFrom data.table :=
|
|
#' @importFrom data.table copy
|
|
#' @importFrom magrittr %>%
|
|
#' @importFrom magrittr not
|
|
#' @importFrom magrittr add
|
|
#' @importFrom stringr str_extract
|
|
#' @importFrom stringr str_split
|
|
#' @importFrom stringr str_extract
|
|
#' @importFrom stringr str_trim
|
|
#' @importFrom DiagrammeR DiagrammeR
|
|
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
|
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
|
|
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
|
|
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
|
|
#' @param style a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.
|
|
#'
|
|
#' @return A \code{DiagrammeR} of the model.
|
|
#'
|
|
#' @details
|
|
#'
|
|
#' The content of each node is organised that way:
|
|
#'
|
|
#' \itemize{
|
|
#' \item \code{feature} value ;
|
|
#' \item \code{cover}: the sum of second order gradient of training data classified to the leaf, if it is square loss, this simply corresponds to the number of instances in that branch. Deeper in the tree a node is, lower this metric will be ;
|
|
#' \item \code{gain}: metric the importance of the node in the model.
|
|
#' }
|
|
#'
|
|
#' Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
|
|
#' It uses \href{https://github.com/knsv/mermaid/}{Mermaid} library for that purpose.
|
|
#'
|
|
#' @examples
|
|
#' data(agaricus.train, package='xgboost')
|
|
#'
|
|
#' #Both dataset are list with two items, a sparse matrix and labels (labels = outcome column which will be learned).
|
|
#' #Each column of the sparse Matrix is a feature in one hot encoding format.
|
|
#' train <- agaricus.train
|
|
#'
|
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
|
#' eta = 1, nround = 2,objective = "binary:logistic")
|
|
#'
|
|
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
|
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
|
|
#'
|
|
#' @export
|
|
#'
|
|
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, styles = NULL){
|
|
|
|
if (!(class(styles) %in% c("character", "NULL") && length(styles) == 1)) {
|
|
stop("style: Has to be a character vector of size 1.")
|
|
}
|
|
|
|
if (!class(model) %in% c("xgb.Booster", "NULL")) {
|
|
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
|
|
}
|
|
|
|
if(is.null(model)){
|
|
allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree)
|
|
} else {
|
|
text = xgb.dump(model = model, with.stats = T)
|
|
allTrees <- xgb.model.dt.tree(feature_names = feature_names, text = text, n_first_tree = n_first_tree)
|
|
}
|
|
|
|
allTrees[Feature!="Leaf" ,yesPath:= paste(ID,"(", Feature, "<br/>Cover: ", Cover, "<br/>Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
|
|
|
|
allTrees[Feature!="Leaf" ,noPath:= paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
|
|
|
|
|
|
if(is.null(styles)){
|
|
styles <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
|
|
}
|
|
|
|
yes <- allTrees[Feature!="Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
|
|
|
|
no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
|
|
|
|
path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(styles, yes, no, sep = ";")
|
|
DiagrammeR(path)
|
|
}
|