[R] xgb.plot.tree fixes (#1939)
* [R] a few fixes and improvements to xgb.plot.tree * [R] deprecate n_first_tree replace with trees; fix types in xgb.model.dt.tree
This commit is contained in:
committed by
Tianqi Chen
parent
d23ea5ca7d
commit
d7406e07f3
@@ -2,8 +2,8 @@
|
||||
#'
|
||||
#' Visualization of the ensemble of trees as a single collective unit.
|
||||
#'
|
||||
#' @param model dump generated by the \code{xgb.train} function.
|
||||
#' @param feature_names names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
||||
#' @param model produced by the \code{xgb.train} function.
|
||||
#' @param feature_names names of each feature as a \code{character} vector.
|
||||
#' @param features_keep number of features to keep in each position of the multi trees.
|
||||
#' @param plot_width width in pixels of the graph to produce
|
||||
#' @param plot_height height in pixels of the graph to produce
|
||||
@@ -13,21 +13,19 @@
|
||||
#'
|
||||
#' @details
|
||||
#'
|
||||
#' This function tries to capture the complexity of gradient boosted tree ensemble
|
||||
#' in a cohesive way.
|
||||
#' This function tries to capture the complexity of a gradient boosted tree model
|
||||
#' in a cohesive way by compressing an ensemble of trees into a single tree-graph representation.
|
||||
#' The goal is to improve the interpretability of a model generally seen as black box.
|
||||
#'
|
||||
#' The goal is to improve the interpretability of the model generally seen as black box.
|
||||
#' The function is dedicated to boosting applied to decision trees only.
|
||||
#'
|
||||
#' The purpose is to move from an ensemble of trees to a single tree only.
|
||||
#' Note: this function is applicable to tree booster-based models only.
|
||||
#'
|
||||
#' It takes advantage of the fact that the shape of a binary tree is only defined by
|
||||
#' its deepness (therefore in a boosting model, all trees have the same shape).
|
||||
#' its depth (therefore, in a boosting model, all trees have similar shape).
|
||||
#'
|
||||
#' Moreover, the trees tend to reuse the same features.
|
||||
#'
|
||||
#' The function will project each tree on one, and keep for each position the
|
||||
#' \code{features_keep} first features (based on Gain per feature measure).
|
||||
#' The function projects each tree onto one, and keeps for each position the
|
||||
#' \code{features_keep} first features (based on the Gain per feature measure).
|
||||
#'
|
||||
#' This function is inspired by this blog post:
|
||||
#' \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
|
||||
@@ -70,39 +68,61 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
|
||||
tree.matrix[!is.na(No),No:= paste0(abs.node.position, "_1")]
|
||||
|
||||
|
||||
|
||||
remove.tree <- . %>% stri_replace_first_regex(pattern = "^\\d+-", replacement = "")
|
||||
|
||||
tree.matrix[,`:=`(abs.node.position=remove.tree(abs.node.position), Yes=remove.tree(Yes), No=remove.tree(No))]
|
||||
tree.matrix[,`:=`(abs.node.position = remove.tree(abs.node.position),
|
||||
Yes = remove.tree(Yes),
|
||||
No = remove.tree(No))]
|
||||
|
||||
nodes.dt <- tree.matrix[,.(Quality = sum(Quality)),by = .(abs.node.position, Feature)][,.(Text =paste0(Feature[1:min(length(Feature), features_keep)], " (", Quality[1:min(length(Quality), features_keep)], ")") %>% paste0(collapse = "\n")), by=abs.node.position]
|
||||
edges.dt <- tree.matrix[Feature != "Leaf",.(abs.node.position, Yes)] %>% list(tree.matrix[Feature != "Leaf",.(abs.node.position, No)]) %>% rbindlist() %>% setnames(c("From", "To")) %>% .[,.N,.(From, To)] %>% .[,N:=NULL]
|
||||
nodes.dt <- tree.matrix[
|
||||
, .(Quality = sum(Quality))
|
||||
, by = .(abs.node.position, Feature)
|
||||
][, .(Text = paste0(Feature[1:min(length(Feature), features_keep)],
|
||||
" (",
|
||||
format(Quality[1:min(length(Quality), features_keep)], digits=5),
|
||||
")") %>%
|
||||
paste0(collapse = "\n"))
|
||||
, by = abs.node.position]
|
||||
|
||||
nodes <- DiagrammeR::create_node_df(n = nrow(nodes.dt),
|
||||
label = nodes.dt[,Text],
|
||||
style = "filled",
|
||||
color = "DimGray",
|
||||
fillcolor= "Beige",
|
||||
shape = "oval",
|
||||
fontname = "Helvetica"
|
||||
edges.dt <- tree.matrix[Feature != "Leaf", .(abs.node.position, Yes)] %>%
|
||||
list(tree.matrix[Feature != "Leaf",.(abs.node.position, No)]) %>%
|
||||
rbindlist() %>%
|
||||
setnames(c("From", "To")) %>%
|
||||
.[, .N, .(From, To)] %>%
|
||||
.[, N:=NULL]
|
||||
|
||||
nodes <- DiagrammeR::create_node_df(
|
||||
n = nrow(nodes.dt),
|
||||
label = nodes.dt[,Text]
|
||||
)
|
||||
|
||||
edges <- DiagrammeR::create_edge_df(from = match(edges.dt[,From], nodes.dt[,abs.node.position]),
|
||||
to = match(edges.dt[,To], nodes.dt[,abs.node.position]),
|
||||
color = "DimGray",
|
||||
arrowsize = "1.5",
|
||||
arrowhead = "vee",
|
||||
fontname = "Helvetica",
|
||||
rel = "leading_to")
|
||||
edges <- DiagrammeR::create_edge_df(
|
||||
from = match(edges.dt[,From], nodes.dt[,abs.node.position]),
|
||||
to = match(edges.dt[,To], nodes.dt[,abs.node.position]),
|
||||
rel = "leading_to")
|
||||
|
||||
graph <- DiagrammeR::create_graph(nodes_df = nodes,
|
||||
edges_df = edges)
|
||||
graph <- DiagrammeR::create_graph(
|
||||
nodes_df = nodes,
|
||||
edges_df = edges,
|
||||
attr_theme = NULL
|
||||
) %>%
|
||||
DiagrammeR::add_global_graph_attrs(
|
||||
attr_type = "graph",
|
||||
attr = c("layout", "rankdir"),
|
||||
value = c("dot", "LR")
|
||||
) %>%
|
||||
DiagrammeR::add_global_graph_attrs(
|
||||
attr_type = "node",
|
||||
attr = c("color", "fillcolor", "style", "shape", "fontname"),
|
||||
value = c("DimGray", "beige", "filled", "rectangle", "Helvetica")
|
||||
) %>%
|
||||
DiagrammeR::add_global_graph_attrs(
|
||||
attr_type = "edge",
|
||||
attr = c("color", "arrowsize", "arrowhead", "fontname"),
|
||||
value = c("DimGray", "1.5", "vee", "Helvetica"))
|
||||
|
||||
DiagrammeR::render_graph(graph, width = plot_width, height = plot_height)
|
||||
}
|
||||
|
||||
globalVariables(
|
||||
c(
|
||||
".N", "N", "From", "To", "Text", "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
|
||||
)
|
||||
)
|
||||
globalVariables(c(".N", "N", "From", "To", "Text", "Feature", "no.nodes.abs.pos",
|
||||
"ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"))
|
||||
|
||||
Reference in New Issue
Block a user