add exclusion of global variables + generate Roxygen doc

2015-11-27 17:58:50 +01:00
parent 3d50a6a425
commit 68b666d7e5
4 changed files with 74 additions and 7 deletions
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -12,6 +12,7 @@ export(xgb.load)
 export(xgb.model.dt.tree)
 export(xgb.plot.deepness)
 export(xgb.plot.importance)
+export(xgb.plot.multi.trees)
 export(xgb.plot.tree)
 export(xgb.save)
 export(xgb.save.raw)
--- a/R-package/R/xgb.plot.multi.trees.R
+++ b/R-package/R/xgb.plot.multi.trees.R
@@ -1,6 +1,6 @@
 #' Project all trees on one tree and plot it
 #' 
-#' visualization to view the ensemble of trees as a single collective unit.
+#' Visualization of the ensemble of trees as a single collective unit.
 #'
 #' @importFrom data.table data.table
 #' @importFrom data.table rbindlist
@@ -18,16 +18,20 @@
 #' 
 #' @details
 #' 
-#' This function tries to capture the complexity of gradient boosted tree ensembles in a cohesive way. 
+#' This function tries to capture the complexity of gradient boosted tree ensembles 
+#' in a cohesive way. 
 #' The goal is to improve the interpretability of the model generally seen as black box.
 #' The function is dedicated to boosting applied to decision trees only.
 #' 
 #' The purpose is to move from an ensemble of trees to a single tree only.
-#' It takes advantage of the fact that the shape of a binary tree is only defined by its deepness.
+#' It takes advantage of the fact that the shape of a binary tree is only defined by 
+#' its deepness.
 #' Therefore in a boosting model, all trees have the same shape. 
 #' Moreover, the trees tend to reuse the same features.
 #' 
-#' The function will project each trees on one tree, and keep the \code{features.keep} first feature for each position.
+#' The function will project each trees on one, and keep for each position the 
+#' \code{features.keep} first features (based on Gain per feature).
+#' 
 #' This function is inspired from this blog post:
 #' \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
 #'
@@ -99,3 +103,9 @@ xgb.plot.multi.trees <- function(model, names, features.keep = 5, plot.width = N
  
  DiagrammeR::render_graph(graph, width = plot.width, height = plot.height)  
 }
+
+globalVariables(
+  c(
+    "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
+  )
+)
--- a/R-package/man/xgb.plot.multi.trees.Rd
+++ b/R-package/man/xgb.plot.multi.trees.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xgb.plot.multi.trees.R
+\name{xgb.plot.multi.trees}
+\alias{xgb.plot.multi.trees}
+\title{Project all trees on one tree and plot it}
+\usage{
+xgb.plot.multi.trees(model, names, features.keep = 5, plot.width = NULL,
+  plot.height = NULL)
+}
+\arguments{
+\item{model}{dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.}
+
+\item{features.keep}{number of features to keep in each position of the multi tree.}
+
+\item{plot.width}{width in pixels of the graph to produce}
+
+\item{plot.height}{height in pixels of the graph to produce}
+
+\item{filename_dump}{the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).}
+}
+\value{
+Two graphs showing the distribution of the model deepness.
+}
+\description{
+Visualization of the ensemble of trees as a single collective unit.
+}
+\details{
+This function tries to capture the complexity of gradient boosted tree ensembles 
+in a cohesive way. 
+The goal is to improve the interpretability of the model generally seen as black box.
+The function is dedicated to boosting applied to decision trees only.
+
+The purpose is to move from an ensemble of trees to a single tree only.
+It takes advantage of the fact that the shape of a binary tree is only defined by 
+its deepness.
+Therefore in a boosting model, all trees have the same shape. 
+Moreover, the trees tend to reuse the same features.
+
+The function will project each trees on one, and keep for each position the 
+\code{features.keep} first features (based on Gain per feature).
+
+This function is inspired from this blog post:
+\url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
+}
+\examples{
+data(agaricus.train, package='xgboost')
+
+bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 15,
+                 eta = 1, nthread = 2, nround = 30, objective = "binary:logistic",
+                 min_child_weight = 50)
+
+p <- xgb.plot.multi.trees(bst, agaricus.train$data@Dimnames[[2]], 3)
+print(p)
+
+}
+
--- a/R-package/man/xgb.plot.tree.Rd
+++ b/R-package/man/xgb.plot.tree.Rd
@@ -5,7 +5,7 @@
 \title{Plot a boosted tree model}
 \usage{
 xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
-  n_first_tree = NULL, width = NULL, height = NULL)
+  n_first_tree = NULL, plot.width = NULL, plot.height = NULL)
 }
 \arguments{
 \item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
@@ -16,9 +16,9 @@ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,

 \item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}

-\item{width}{the width of the diagram in pixels.}
+\item{plot.width}{the width of the diagram in pixels.}

-\item{height}{the height of the diagram in pixels.}
+\item{plot.height}{the height of the diagram in pixels.}
 }
 \value{
 A \code{DiagrammeR} of the model.