From 68b666d7e538b3baa6ea07ad8501f16a3709c385 Mon Sep 17 00:00:00 2001 From: pommedeterresautee Date: Fri, 27 Nov 2015 17:58:50 +0100 Subject: [PATCH] add exclusion of global variables + generate Roxygen doc --- R-package/NAMESPACE | 1 + R-package/R/xgb.plot.multi.trees.R | 18 +++++++-- R-package/man/xgb.plot.multi.trees.Rd | 56 +++++++++++++++++++++++++++ R-package/man/xgb.plot.tree.Rd | 6 +-- 4 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 R-package/man/xgb.plot.multi.trees.Rd diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 7f6fa5817..3a590f27a 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -12,6 +12,7 @@ export(xgb.load) export(xgb.model.dt.tree) export(xgb.plot.deepness) export(xgb.plot.importance) +export(xgb.plot.multi.trees) export(xgb.plot.tree) export(xgb.save) export(xgb.save.raw) diff --git a/R-package/R/xgb.plot.multi.trees.R b/R-package/R/xgb.plot.multi.trees.R index 2f0fb1d3f..d30d86332 100644 --- a/R-package/R/xgb.plot.multi.trees.R +++ b/R-package/R/xgb.plot.multi.trees.R @@ -1,6 +1,6 @@ #' Project all trees on one tree and plot it #' -#' visualization to view the ensemble of trees as a single collective unit. +#' Visualization of the ensemble of trees as a single collective unit. #' #' @importFrom data.table data.table #' @importFrom data.table rbindlist @@ -18,16 +18,20 @@ #' #' @details #' -#' This function tries to capture the complexity of gradient boosted tree ensembles in a cohesive way. +#' This function tries to capture the complexity of gradient boosted tree ensembles +#' in a cohesive way. #' The goal is to improve the interpretability of the model generally seen as black box. #' The function is dedicated to boosting applied to decision trees only. #' #' The purpose is to move from an ensemble of trees to a single tree only. -#' It takes advantage of the fact that the shape of a binary tree is only defined by its deepness. +#' It takes advantage of the fact that the shape of a binary tree is only defined by +#' its deepness. #' Therefore in a boosting model, all trees have the same shape. #' Moreover, the trees tend to reuse the same features. #' -#' The function will project each trees on one tree, and keep the \code{features.keep} first feature for each position. +#' The function will project each trees on one, and keep for each position the +#' \code{features.keep} first features (based on Gain per feature). +#' #' This function is inspired from this blog post: #' \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/} #' @@ -99,3 +103,9 @@ xgb.plot.multi.trees <- function(model, names, features.keep = 5, plot.width = N DiagrammeR::render_graph(graph, width = plot.width, height = plot.height) } + +globalVariables( + c( + "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position" + ) +) \ No newline at end of file diff --git a/R-package/man/xgb.plot.multi.trees.Rd b/R-package/man/xgb.plot.multi.trees.Rd new file mode 100644 index 000000000..2bbe29ca5 --- /dev/null +++ b/R-package/man/xgb.plot.multi.trees.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.plot.multi.trees.R +\name{xgb.plot.multi.trees} +\alias{xgb.plot.multi.trees} +\title{Project all trees on one tree and plot it} +\usage{ +xgb.plot.multi.trees(model, names, features.keep = 5, plot.width = NULL, + plot.height = NULL) +} +\arguments{ +\item{model}{dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.} + +\item{features.keep}{number of features to keep in each position of the multi tree.} + +\item{plot.width}{width in pixels of the graph to produce} + +\item{plot.height}{height in pixels of the graph to produce} + +\item{filename_dump}{the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).} +} +\value{ +Two graphs showing the distribution of the model deepness. +} +\description{ +Visualization of the ensemble of trees as a single collective unit. +} +\details{ +This function tries to capture the complexity of gradient boosted tree ensembles +in a cohesive way. +The goal is to improve the interpretability of the model generally seen as black box. +The function is dedicated to boosting applied to decision trees only. + +The purpose is to move from an ensemble of trees to a single tree only. +It takes advantage of the fact that the shape of a binary tree is only defined by +its deepness. +Therefore in a boosting model, all trees have the same shape. +Moreover, the trees tend to reuse the same features. + +The function will project each trees on one, and keep for each position the +\code{features.keep} first features (based on Gain per feature). + +This function is inspired from this blog post: +\url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/} +} +\examples{ +data(agaricus.train, package='xgboost') + +bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 15, + eta = 1, nthread = 2, nround = 30, objective = "binary:logistic", + min_child_weight = 50) + +p <- xgb.plot.multi.trees(bst, agaricus.train$data@Dimnames[[2]], 3) +print(p) + +} + diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd index f34e75bf9..2008014cf 100644 --- a/R-package/man/xgb.plot.tree.Rd +++ b/R-package/man/xgb.plot.tree.Rd @@ -5,7 +5,7 @@ \title{Plot a boosted tree model} \usage{ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL, - n_first_tree = NULL, width = NULL, height = NULL) + n_first_tree = NULL, plot.width = NULL, plot.height = NULL) } \arguments{ \item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.} @@ -16,9 +16,9 @@ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL, \item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.} -\item{width}{the width of the diagram in pixels.} +\item{plot.width}{the width of the diagram in pixels.} -\item{height}{the height of the diagram in pixels.} +\item{plot.height}{the height of the diagram in pixels.} } \value{ A \code{DiagrammeR} of the model.