moved the external graphing packages to Suggested in order to trim the dependencies

This commit is contained in:
Vadim Khotilovich 2015-04-07 17:43:53 -05:00
parent e91bacd378
commit 76cef701ab
4 changed files with 21 additions and 34 deletions

View File

@ -18,7 +18,12 @@ License: Apache License (== 2.0) | file LICENSE
URL: https://github.com/dmlc/xgboost URL: https://github.com/dmlc/xgboost
BugReports: https://github.com/dmlc/xgboost/issues BugReports: https://github.com/dmlc/xgboost/issues
VignetteBuilder: knitr VignetteBuilder: knitr
Suggests: knitr Suggests:
knitr,
ggplot2 (>= 1.0.0),
DiagrammeR (>= 0.4),
Ckmeans.1d.dp (>= 3.3.1),
vcd (>= 1.3)
Depends: Depends:
R (>= 2.10) R (>= 2.10)
Imports: Imports:
@ -26,8 +31,4 @@ Imports:
methods, methods,
data.table (>= 1.9.4), data.table (>= 1.9.4),
magrittr (>= 1.5), magrittr (>= 1.5),
stringr (>= 0.6.2), stringr (>= 0.6.2)
DiagrammeR (>= 0.4),
ggplot2 (>= 1.0.0),
Ckmeans.1d.dp (>= 3.3.1),
vcd (>= 1.3)

View File

@ -21,8 +21,6 @@ exportMethods(predict)
import(methods) import(methods)
importClassesFrom(Matrix,dgCMatrix) importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgeMatrix) importClassesFrom(Matrix,dgeMatrix)
importFrom(Ckmeans.1d.dp,Ckmeans.1d.dp)
importFrom(DiagrammeR,mermaid)
importFrom(Matrix,cBind) importFrom(Matrix,cBind)
importFrom(Matrix,colSums) importFrom(Matrix,colSums)
importFrom(Matrix,sparseVector) importFrom(Matrix,sparseVector)
@ -34,16 +32,6 @@ importFrom(data.table,fread)
importFrom(data.table,rbindlist) importFrom(data.table,rbindlist)
importFrom(data.table,set) importFrom(data.table,set)
importFrom(data.table,setnames) importFrom(data.table,setnames)
importFrom(ggplot2,aes)
importFrom(ggplot2,coord_flip)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,element_text)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,theme)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(magrittr,"%>%") importFrom(magrittr,"%>%")
importFrom(magrittr,add) importFrom(magrittr,add)
importFrom(magrittr,not) importFrom(magrittr,not)

View File

@ -2,17 +2,6 @@
#' #'
#' Read a data.table containing feature importance details and plot it. #' Read a data.table containing feature importance details and plot it.
#' #'
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 coord_flip
#' @importFrom ggplot2 xlab
#' @importFrom ggplot2 ylab
#' @importFrom ggplot2 ggtitle
#' @importFrom ggplot2 theme
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 element_blank
#' @importFrom Ckmeans.1d.dp Ckmeans.1d.dp
#' @importFrom magrittr %>% #' @importFrom magrittr %>%
#' @param importance_matrix a \code{data.table} returned by the \code{xgb.importance} function. #' @param importance_matrix a \code{data.table} returned by the \code{xgb.importance} function.
#' @param numberOfClusters a \code{numeric} vector containing the min and the max range of the possible number of clusters of bars. #' @param numberOfClusters a \code{numeric} vector containing the min and the max range of the possible number of clusters of bars.
@ -44,11 +33,17 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
if (!"data.table" %in% class(importance_matrix)) { if (!"data.table" %in% class(importance_matrix)) {
stop("importance_matrix: Should be a data.table.") stop("importance_matrix: Should be a data.table.")
} }
if (!require(ggplot2, quietly = TRUE)) {
stop("ggplot2 package is required for plotting the importance", call. = FALSE)
}
if (!requireNamespace("Ckmeans.1d.dp", quietly = TRUE)) {
stop("Ckmeans.1d.dp package is required for plotting the importance", call. = FALSE)
}
# To avoid issues in clustering when co-occurences are used # To avoid issues in clustering when co-occurences are used
importance_matrix <- importance_matrix[, .(Gain = sum(Gain)), by = Feature] importance_matrix <- importance_matrix[, .(Gain = sum(Gain)), by = Feature]
clusters <- suppressWarnings(Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters)) clusters <- suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
importance_matrix[,"Cluster":=clusters$cluster %>% as.character] importance_matrix[,"Cluster":=clusters$cluster %>% as.character]
plot <- ggplot(importance_matrix, aes(x=reorder(Feature, Gain), y = Gain, width= 0.05), environment = environment())+ geom_bar(aes(fill=Cluster), stat="identity", position="identity") + coord_flip() + xlab("Features") + ylab("Gain") + ggtitle("Feature importance") + theme(plot.title = element_text(lineheight=.9, face="bold"), panel.grid.major.y = element_blank() ) plot <- ggplot(importance_matrix, aes(x=reorder(Feature, Gain), y = Gain, width= 0.05), environment = environment())+ geom_bar(aes(fill=Cluster), stat="identity", position="identity") + coord_flip() + xlab("Features") + ylab("Gain") + ggtitle("Feature importance") + theme(plot.title = element_text(lineheight=.9, face="bold"), panel.grid.major.y = element_blank() )

View File

@ -15,7 +15,6 @@
#' @importFrom stringr str_split #' @importFrom stringr str_split
#' @importFrom stringr str_extract #' @importFrom stringr str_extract
#' @importFrom stringr str_trim #' @importFrom stringr str_trim
#' @importFrom DiagrammeR mermaid
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}. #' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument). #' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file. #' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
@ -64,7 +63,11 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
if (!class(model) %in% c("xgb.Booster", "NULL")) { if (!class(model) %in% c("xgb.Booster", "NULL")) {
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.") stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
} }
if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
stop("DiagrammeR package is required for xgb.plot.tree", call. = FALSE)
}
if(is.null(model)){ if(is.null(model)){
allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree) allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree)
} else { } else {
@ -85,7 +88,7 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "") no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";") path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
mermaid(path, width, height) DiagrammeR::mermaid(path, width, height)
} }
# Avoid error messages during CRAN check. # Avoid error messages during CRAN check.