Rewrite tree plot function
Replace Mermaid by GraphViz
This commit is contained in:
parent
231a6e7aea
commit
635645c650
@ -9,17 +9,14 @@
|
||||
#' @importFrom data.table :=
|
||||
#' @importFrom data.table copy
|
||||
#' @importFrom magrittr %>%
|
||||
#' @importFrom magrittr not
|
||||
#' @importFrom magrittr add
|
||||
#' @importFrom stringr str_extract
|
||||
#' @importFrom stringr str_split
|
||||
#' @importFrom stringr str_extract
|
||||
#' @importFrom stringr str_trim
|
||||
#' @importFrom DiagrammeR create_nodes
|
||||
#' @importFrom DiagrammeR create_edges
|
||||
#' @importFrom DiagrammeR create_graph
|
||||
#' @importFrom DiagrammeR render_graph
|
||||
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
||||
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
|
||||
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
|
||||
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
|
||||
#' @param CSSstyle a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.
|
||||
#' @param width the width of the diagram in pixels.
|
||||
#' @param height the height of the diagram in pixels.
|
||||
#'
|
||||
@ -36,7 +33,7 @@
|
||||
#' }
|
||||
#'
|
||||
#' Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
|
||||
#' It uses \href{https://github.com/knsv/mermaid/}{Mermaid} library for that purpose.
|
||||
#' It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
@ -53,12 +50,7 @@
|
||||
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
|
||||
#'
|
||||
#' @export
|
||||
#'
|
||||
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){
|
||||
|
||||
if (!(class(CSSstyle) %in% c("character", "NULL") && length(CSSstyle) <= 1)) {
|
||||
stop("style: Has to be a character vector of size 1.")
|
||||
}
|
||||
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, width = NULL, height = NULL){
|
||||
|
||||
if (!class(model) %in% c("xgb.Booster", "NULL")) {
|
||||
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
|
||||
@ -78,19 +70,38 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
|
||||
|
||||
allTrees[Feature != "Leaf" ,noPath := paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
|
||||
|
||||
if(is.null(CSSstyle)){
|
||||
CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
|
||||
}
|
||||
allTrees[, label:= paste0(Feature, "\nCover: ", Cover, "\nGain: ", Quality)]
|
||||
allTrees[, shape:= "rectangle"][Feature == "Leaf", shape:= "oval"]
|
||||
allTrees[, filledcolor:= "Beige"][Feature == "Leaf", filledcolor:= "Khaki"]
|
||||
|
||||
nodes <- create_nodes(nodes = allTrees[,ID],
|
||||
label = allTrees[,label],
|
||||
#type = c("lower", "lower", "upper", "upper"),
|
||||
style = "filled",
|
||||
color = "DimGray",
|
||||
fillcolor= allTrees[,filledcolor],
|
||||
shape = allTrees[,shape],
|
||||
data = allTrees[,Feature],
|
||||
fontname = "Helvetica"
|
||||
)
|
||||
|
||||
edges <- create_edges(from = allTrees[Feature != "Leaf", c(ID)] %>% rep(2),
|
||||
to = allTrees[Feature != "Leaf", c(Yes, No)],
|
||||
label = allTrees[Feature != "Leaf", paste("<",Split)] %>% c(rep("",nrow(allTrees[Feature != "Leaf"]))),
|
||||
color = "DimGray",
|
||||
arrowsize = "1.5",
|
||||
arrowhead = "vee",
|
||||
fontname = "Helvetica",
|
||||
rel = "leading_to")
|
||||
|
||||
yes <- allTrees[Feature != "Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
|
||||
|
||||
no <- allTrees[Feature != "Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
|
||||
|
||||
path <- allTrees[Feature != "Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
|
||||
DiagrammeR::mermaid(path, width, height)
|
||||
graph <- create_graph(nodes_df = nodes,
|
||||
edges_df = edges,
|
||||
graph_attrs = "rankdir = LR")
|
||||
|
||||
render_graph(graph, width = width, height = height)
|
||||
}
|
||||
|
||||
# Avoid error messages during CRAN check.
|
||||
# The reason is that these variables are never declared
|
||||
# They are mainly column names inferred by Data.table...
|
||||
globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", "."))
|
||||
globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", ".", "shape", "filledcolor"))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user