New documentation rewording
This commit is contained in:
parent
f761432c11
commit
b2e68b8dc7
@ -69,7 +69,7 @@ get.paths.to.leaf <- function(dt.tree) {
|
|||||||
#' @importFrom data.table setnames
|
#' @importFrom data.table setnames
|
||||||
#' @importFrom data.table :=
|
#' @importFrom data.table :=
|
||||||
#' @importFrom magrittr %>%
|
#' @importFrom magrittr %>%
|
||||||
#' @param model dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.
|
#' @param model dump generated by the \code{xgb.train} function.
|
||||||
#'
|
#'
|
||||||
#' @return Two graphs showing the distribution of the model deepness.
|
#' @return Two graphs showing the distribution of the model deepness.
|
||||||
#'
|
#'
|
||||||
@ -86,7 +86,7 @@ get.paths.to.leaf <- function(dt.tree) {
|
|||||||
#'
|
#'
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item Count: number of leaf per level of deepness;
|
#' \item Count: number of leaf per level of deepness;
|
||||||
#' \item Weighted cover: noramlized weighted cover per Leaf (weighted number of instances).
|
#' \item Weighted cover: noramlized weighted cover per leaf (weighted number of instances).
|
||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' This function is inspired by the blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html}
|
#' This function is inspired by the blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html}
|
||||||
|
|||||||
@ -10,8 +10,8 @@
|
|||||||
#' @importFrom stringr str_detect
|
#' @importFrom stringr str_detect
|
||||||
#' @importFrom stringr str_extract
|
#' @importFrom stringr str_extract
|
||||||
#'
|
#'
|
||||||
#' @param model dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.
|
#' @param model dump generated by the \code{xgb.train} function.
|
||||||
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
#' @param feature_names names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
||||||
#' @param features.keep number of features to keep in each position of the multi trees.
|
#' @param features.keep number of features to keep in each position of the multi trees.
|
||||||
#' @param plot.width width in pixels of the graph to produce
|
#' @param plot.width width in pixels of the graph to produce
|
||||||
#' @param plot.height height in pixels of the graph to produce
|
#' @param plot.height height in pixels of the graph to produce
|
||||||
|
|||||||
@ -1,12 +1,11 @@
|
|||||||
#' Plot a boosted tree model
|
#' Plot a boosted tree model
|
||||||
#'
|
#'
|
||||||
#' Read a tree model text dump.
|
#' Read a tree model text dump and plot the model.
|
||||||
#' Plotting only works for boosted tree model (not linear model).
|
|
||||||
#'
|
#'
|
||||||
#' @importFrom data.table data.table
|
#' @importFrom data.table data.table
|
||||||
#' @importFrom data.table :=
|
#' @importFrom data.table :=
|
||||||
#' @importFrom magrittr %>%
|
#' @importFrom magrittr %>%
|
||||||
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
#' @param feature_names names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
||||||
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
|
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
|
||||||
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
|
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
|
||||||
#' @param plot.width the width of the diagram in pixels.
|
#' @param plot.width the width of the diagram in pixels.
|
||||||
@ -19,25 +18,20 @@
|
|||||||
#' The content of each node is organised that way:
|
#' The content of each node is organised that way:
|
||||||
#'
|
#'
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{feature} value ;
|
#' \item \code{feature} value;
|
||||||
#' \item \code{cover}: the sum of second order gradient of training data classified to the leaf, if it is square loss, this simply corresponds to the number of instances in that branch. Deeper in the tree a node is, lower this metric will be ;
|
#' \item \code{cover}: the sum of second order gradient of training data classified to the leaf, if it is square loss, this simply corresponds to the number of instances in that branch. Deeper in the tree a node is, lower this metric will be;
|
||||||
#' \item \code{gain}: metric the importance of the node in the model.
|
#' \item \code{gain}: metric the importance of the node in the model.
|
||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
|
#' The function uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
|
||||||
#' It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
|
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#'
|
#'
|
||||||
#' #Both dataset are list with two items, a sparse matrix and labels
|
|
||||||
#' #(labels = outcome column which will be learned).
|
|
||||||
#' #Each column of the sparse Matrix is a feature in one hot encoding format.
|
|
||||||
#'
|
|
||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2,
|
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2,
|
||||||
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
#'
|
#'
|
||||||
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
#' # agaricus.train$data@@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
#' xgb.plot.tree(feature_names = agaricus.train$data@@Dimnames[[2]], model = bst)
|
#' xgb.plot.tree(feature_names = agaricus.train$data@@Dimnames[[2]], model = bst)
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
|
|||||||
@ -7,7 +7,7 @@
|
|||||||
xgb.plot.deepness(model = NULL)
|
xgb.plot.deepness(model = NULL)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{model}{dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.}
|
\item{model}{dump generated by the \code{xgb.train} function.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
Two graphs showing the distribution of the model deepness.
|
Two graphs showing the distribution of the model deepness.
|
||||||
@ -28,7 +28,7 @@ The graph is made of two parts:
|
|||||||
|
|
||||||
\itemize{
|
\itemize{
|
||||||
\item Count: number of leaf per level of deepness;
|
\item Count: number of leaf per level of deepness;
|
||||||
\item Weighted cover: noramlized weighted cover per Leaf (weighted number of instances).
|
\item Weighted cover: noramlized weighted cover per leaf (weighted number of instances).
|
||||||
}
|
}
|
||||||
|
|
||||||
This function is inspired by the blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html}
|
This function is inspired by the blog post \url{http://aysent.github.io/2015/11/08/random-forest-leaf-visualization.html}
|
||||||
|
|||||||
@ -8,9 +8,9 @@ xgb.plot.multi.trees(model, feature_names = NULL, features.keep = 5,
|
|||||||
plot.width = NULL, plot.height = NULL)
|
plot.width = NULL, plot.height = NULL)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{model}{dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.}
|
\item{model}{dump generated by the \code{xgb.train} function.}
|
||||||
|
|
||||||
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
|
\item{feature_names}{names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
|
||||||
|
|
||||||
\item{features.keep}{number of features to keep in each position of the multi trees.}
|
\item{features.keep}{number of features to keep in each position of the multi trees.}
|
||||||
|
|
||||||
|
|||||||
@ -8,7 +8,7 @@ xgb.plot.tree(feature_names = NULL, model = NULL, n_first_tree = NULL,
|
|||||||
plot.width = NULL, plot.height = NULL)
|
plot.width = NULL, plot.height = NULL)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
|
\item{feature_names}{names of each feature as a \code{character} vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
|
||||||
|
|
||||||
\item{model}{generated by the \code{xgb.train} function. Avoid the creation of a dump file.}
|
\item{model}{generated by the \code{xgb.train} function. Avoid the creation of a dump file.}
|
||||||
|
|
||||||
@ -22,32 +22,26 @@ xgb.plot.tree(feature_names = NULL, model = NULL, n_first_tree = NULL,
|
|||||||
A \code{DiagrammeR} of the model.
|
A \code{DiagrammeR} of the model.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Read a tree model text dump.
|
Read a tree model text dump and plot the model.
|
||||||
Plotting only works for boosted tree model (not linear model).
|
|
||||||
}
|
}
|
||||||
\details{
|
\details{
|
||||||
The content of each node is organised that way:
|
The content of each node is organised that way:
|
||||||
|
|
||||||
\itemize{
|
\itemize{
|
||||||
\item \code{feature} value ;
|
\item \code{feature} value;
|
||||||
\item \code{cover}: the sum of second order gradient of training data classified to the leaf, if it is square loss, this simply corresponds to the number of instances in that branch. Deeper in the tree a node is, lower this metric will be ;
|
\item \code{cover}: the sum of second order gradient of training data classified to the leaf, if it is square loss, this simply corresponds to the number of instances in that branch. Deeper in the tree a node is, lower this metric will be;
|
||||||
\item \code{gain}: metric the importance of the node in the model.
|
\item \code{gain}: metric the importance of the node in the model.
|
||||||
}
|
}
|
||||||
|
|
||||||
Each branch finishes with a leaf. For each leaf, only the \code{cover} is indicated.
|
The function uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
|
||||||
It uses \href{http://www.graphviz.org/}{GraphViz} library for that purpose.
|
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
|
||||||
#Both dataset are list with two items, a sparse matrix and labels
|
|
||||||
#(labels = outcome column which will be learned).
|
|
||||||
#Each column of the sparse Matrix is a feature in one hot encoding format.
|
|
||||||
|
|
||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2,
|
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max.depth = 2,
|
||||||
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
# agaricus.train$data@Dimnames[[2]] represents the column names of the sparse matrix.
|
||||||
xgb.plot.tree(feature_names = agaricus.train$data@Dimnames[[2]], model = bst)
|
xgb.plot.tree(feature_names = agaricus.train$data@Dimnames[[2]], model = bst)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user