Rewrite tree plot function

Replace Mermaid by GraphViz
This commit is contained in:
unknown 2015-11-07 21:00:02 +01:00
parent 231a6e7aea
commit 635645c650

View File

@ -9,17 +9,14 @@
#' @importFrom data.table :=
#' @importFrom data.table copy
#' @importFrom magrittr %>%
#' @importFrom magrittr not
#' @importFrom magrittr add
#' @importFrom stringr str_extract
#' @importFrom stringr str_split
#' @importFrom stringr str_extract
#' @importFrom stringr str_trim
#' @importFrom DiagrammeR create_nodes
#' @importFrom DiagrammeR create_edges
#' @importFrom DiagrammeR create_graph
#' @importFrom DiagrammeR render_graph
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
#' @param CSSstyle a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.
#' @param width the width of the diagram in pixels.
#' @param height the height of the diagram in pixels.
#'
@ -36,7 +33,7 @@
#' }
#'
#' Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
#' It uses \href{https://github.com/knsv/mermaid/}{Mermaid} library for that purpose.
#' It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
@ -53,12 +50,7 @@
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], model = bst)
#'
#' @export
#'
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){
if (!(class(CSSstyle) %in% c("character", "NULL") && length(CSSstyle) <= 1)) {
stop("style: Has to be a character vector of size 1.")
}
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, width = NULL, height = NULL){
if (!class(model) %in% c("xgb.Booster", "NULL")) {
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
@ -78,19 +70,38 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
allTrees[Feature != "Leaf" ,noPath := paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
if(is.null(CSSstyle)){
CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
}
allTrees[, label:= paste0(Feature, "\nCover: ", Cover, "\nGain: ", Quality)]
allTrees[, shape:= "rectangle"][Feature == "Leaf", shape:= "oval"]
allTrees[, filledcolor:= "Beige"][Feature == "Leaf", filledcolor:= "Khaki"]
nodes <- create_nodes(nodes = allTrees[,ID],
label = allTrees[,label],
#type = c("lower", "lower", "upper", "upper"),
style = "filled",
color = "DimGray",
fillcolor= allTrees[,filledcolor],
shape = allTrees[,shape],
data = allTrees[,Feature],
fontname = "Helvetica"
)
edges <- create_edges(from = allTrees[Feature != "Leaf", c(ID)] %>% rep(2),
to = allTrees[Feature != "Leaf", c(Yes, No)],
label = allTrees[Feature != "Leaf", paste("<",Split)] %>% c(rep("",nrow(allTrees[Feature != "Leaf"]))),
color = "DimGray",
arrowsize = "1.5",
arrowhead = "vee",
fontname = "Helvetica",
rel = "leading_to")
yes <- allTrees[Feature != "Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
no <- allTrees[Feature != "Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
path <- allTrees[Feature != "Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
DiagrammeR::mermaid(path, width, height)
graph <- create_graph(nodes_df = nodes,
edges_df = edges,
graph_attrs = "rankdir = LR")
render_graph(graph, width = width, height = height)
}
# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(c("Feature", "yesPath", "ID", "Cover", "Quality", "Split", "Yes", "Yes.Feature", "noPath", "No", "No.Feature", "."))
globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", ".", "shape", "filledcolor"))