Improve description wording

2015-11-27 17:34:26 +01:00
parent 5169d08735
commit 2fc9dcc549
1 changed files with 15 additions and 14 deletions
--- a/R-package/R/xgb.plot.multi.trees.R
+++ b/R-package/R/xgb.plot.multi.trees.R
@@ -1,32 +1,33 @@
-library(stringr)
-library(data.table)
-library(xgboost)
-
-#' Project all trees on one and plot it
-#'
-#' Provide a way to display on one tree all trees of the model.
+#' Project all trees on one tree and plot it
+#' 
+#' visualization to view the ensemble of trees as a single collective unit.
 #'
 #' @importFrom data.table data.table
 #' @importFrom data.table rbindlist
 #' @importFrom data.table setnames
 #' @importFrom data.table :=
 #' @importFrom magrittr %>%
+#' 
 #' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).
 #' @param model dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.
-#'
+#' @param features.keep number of features to keep in each position of the multi tree.
+#' @param plot.width width in pixels of the graph to produce
+#' @param plot.height height in pixels of the graph to produce
+#' 
 #' @return Two graphs showing the distribution of the model deepness.
 #' 
 #' @details
 #' 
 #' This function tries to capture the complexity of gradient boosted tree ensembles in a cohesive way. 
 #' The goal is to improve the interpretability of the model generally seen as black box.
-#' The function is dedicated to boosting applied to trees only. It won't work on GLM.
+#' The function is dedicated to boosting applied to decision trees only.
 #' 
 #' The purpose is to move from an ensemble of trees to a single tree only.
-#' It leverages the fact that the shape of a binary tree is only defined by its deepness.
-#' The second fact which is leverage is that all trees in a boosting model tend to share the features they use.
+#' It takes advantage of the fact that the shape of a binary tree is only defined by its deepness.
+#' Therefore in a boosting model, all trees have the same shape. 
+#' Moreover, the trees tend to reuse the same features.
 #' 
-#' The function will project each trees on one tree, and keep the \code{keepN} first feature for each position.
+#' The function will project each trees on one tree, and keep the \code{features.keep} first feature for each position.
 #' This function is inspired from this blog post:
 #' \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
 #'
@@ -41,7 +42,7 @@ library(xgboost)
 #' print(p)
 #'
 #' @export
-xgb.plot.multi.trees <- function(model, names, keepN = 5, plot.width = NULL, plot.height = NULL){
+xgb.plot.multi.trees <- function(model, names, features.keep = 5, plot.width = NULL, plot.height = NULL){
  tree.matrix <- xgb.model.dt.tree(names, model = model)
  
  # first number of the path represents the tree, then the following numbers are related to the path to follow
@@ -71,7 +72,7 @@ xgb.plot.multi.trees <- function(model, names, keepN = 5, plot.width = NULL, plo
  
  tree.matrix[,`:=`(abs.node.position=remove.tree(abs.node.position), Yes=remove.tree(Yes), No=remove.tree(No))]
  
-  nodes.dt <- tree.matrix[,.(Quality = sum(Quality)),by = .(abs.node.position, Feature)][,.(Text =paste0(Feature[1:min(length(Feature), keepN)], " (", Quality[1:min(length(Quality), keepN)], ")") %>% paste0(collapse = "\n")), by=abs.node.position]
+  nodes.dt <- tree.matrix[,.(Quality = sum(Quality)),by = .(abs.node.position, Feature)][,.(Text =paste0(Feature[1:min(length(Feature), features.keep)], " (", Quality[1:min(length(Quality), features.keep)], ")") %>% paste0(collapse = "\n")), by=abs.node.position]
  edges.dt <- tree.matrix[Feature != "Leaf",.(abs.node.position, Yes)] %>% list(tree.matrix[Feature != "Leaf",.(abs.node.position, No)]) %>% rbindlist() %>% setnames(c("From", "To")) %>% .[,.N,.(From, To)] %>% .[,N:=NULL]
  
  nodes <- DiagrammeR::create_nodes(nodes = nodes.dt[,abs.node.position],