add exclusion of global variables + generate Roxygen doc

This commit is contained in:
pommedeterresautee 2015-11-27 17:58:50 +01:00
parent 3d50a6a425
commit 68b666d7e5
4 changed files with 74 additions and 7 deletions

View File

@ -12,6 +12,7 @@ export(xgb.load)
export(xgb.model.dt.tree)
export(xgb.plot.deepness)
export(xgb.plot.importance)
export(xgb.plot.multi.trees)
export(xgb.plot.tree)
export(xgb.save)
export(xgb.save.raw)

View File

@ -1,6 +1,6 @@
#' Project all trees on one tree and plot it
#'
#' visualization to view the ensemble of trees as a single collective unit.
#' Visualization of the ensemble of trees as a single collective unit.
#'
#' @importFrom data.table data.table
#' @importFrom data.table rbindlist
@ -18,16 +18,20 @@
#'
#' @details
#'
#' This function tries to capture the complexity of gradient boosted tree ensembles in a cohesive way.
#' This function tries to capture the complexity of gradient boosted tree ensembles
#' in a cohesive way.
#' The goal is to improve the interpretability of the model generally seen as black box.
#' The function is dedicated to boosting applied to decision trees only.
#'
#' The purpose is to move from an ensemble of trees to a single tree only.
#' It takes advantage of the fact that the shape of a binary tree is only defined by its deepness.
#' It takes advantage of the fact that the shape of a binary tree is only defined by
#' its deepness.
#' Therefore in a boosting model, all trees have the same shape.
#' Moreover, the trees tend to reuse the same features.
#'
#' The function will project each trees on one tree, and keep the \code{features.keep} first feature for each position.
#' The function will project each trees on one, and keep for each position the
#' \code{features.keep} first features (based on Gain per feature).
#'
#' This function is inspired from this blog post:
#' \url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
#'
@ -99,3 +103,9 @@ xgb.plot.multi.trees <- function(model, names, features.keep = 5, plot.width = N
DiagrammeR::render_graph(graph, width = plot.width, height = plot.height)
}
globalVariables(
c(
"Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
)
)

View File

@ -0,0 +1,56 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.plot.multi.trees.R
\name{xgb.plot.multi.trees}
\alias{xgb.plot.multi.trees}
\title{Project all trees on one tree and plot it}
\usage{
xgb.plot.multi.trees(model, names, features.keep = 5, plot.width = NULL,
plot.height = NULL)
}
\arguments{
\item{model}{dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.}
\item{features.keep}{number of features to keep in each position of the multi tree.}
\item{plot.width}{width in pixels of the graph to produce}
\item{plot.height}{height in pixels of the graph to produce}
\item{filename_dump}{the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).}
}
\value{
Two graphs showing the distribution of the model deepness.
}
\description{
Visualization of the ensemble of trees as a single collective unit.
}
\details{
This function tries to capture the complexity of gradient boosted tree ensembles
in a cohesive way.
The goal is to improve the interpretability of the model generally seen as black box.
The function is dedicated to boosting applied to decision trees only.
The purpose is to move from an ensemble of trees to a single tree only.
It takes advantage of the fact that the shape of a binary tree is only defined by
its deepness.
Therefore in a boosting model, all trees have the same shape.
Moreover, the trees tend to reuse the same features.
The function will project each trees on one, and keep for each position the
\code{features.keep} first features (based on Gain per feature).
This function is inspired from this blog post:
\url{https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/}
}
\examples{
data(agaricus.train, package='xgboost')
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 15,
eta = 1, nthread = 2, nround = 30, objective = "binary:logistic",
min_child_weight = 50)
p <- xgb.plot.multi.trees(bst, agaricus.train$data@Dimnames[[2]], 3)
print(p)
}

View File

@ -5,7 +5,7 @@
\title{Plot a boosted tree model}
\usage{
xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
n_first_tree = NULL, width = NULL, height = NULL)
n_first_tree = NULL, plot.width = NULL, plot.height = NULL)
}
\arguments{
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
@ -16,9 +16,9 @@ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
\item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}
\item{width}{the width of the diagram in pixels.}
\item{plot.width}{the width of the diagram in pixels.}
\item{height}{the height of the diagram in pixels.}
\item{plot.height}{the height of the diagram in pixels.}
}
\value{
A \code{DiagrammeR} of the model.