diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index 7dd3a8ca3..174d92704 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -64,9 +64,9 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N } if(text[2] == "bias:"){ - result <- linearDump(feature_names, text) + result <- readLines(filename_dump) %>% linearDump(feature_names, .) } else { - result <- treeDump(feature_names, text) + result <- treeDump(feature_names, text = text) } result } diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R index 3e0723c61..5ad6c6b3d 100644 --- a/R-package/R/xgb.model.dt.tree.R +++ b/R-package/R/xgb.model.dt.tree.R @@ -16,6 +16,8 @@ #' @importFrom stringr str_trim #' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}. #' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). +#' @param model dump generated by the \code{xgb.train} function. Avoid the creation of a dump file. +#' @param text dump generated by the \code{xgb.dump} function. Avoid the creation of a dump file. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). #' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models. #' #' @return A \code{data.table} of the features used in the model with their gain, cover and few other thing. @@ -49,29 +51,37 @@ #' xgb.dump(bst, 'xgb.model.dump', with.stats = T) #' #' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix. -#' xgb.model.dt.tree(agaricus.train$data@@Dimnames[[2]], 'xgb.model.dump') +#' xgb.model.dt.tree(agaricus.train$data@@Dimnames[[2]], filename_dump = 'xgb.model.dump') #' #' @export -xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, text = NULL, n_first_tree = NULL){ +xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, text = NULL, n_first_tree = NULL){ if (!class(feature_names) %in% c("character", "NULL")) { stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.") } - if (!class(filename_dump) %in% c("character", "NULL")) { - stop("filename_dump: Has to be a character vector representing the path to the model dump file.") - } else if (class(filename_dump) == "character" && !file.exists(filename_dump)) { + if (!(class(filename_dump) %in% c("character", "NULL") && length(filename_dump) <= 1)) { + stop("filename_dump: Has to be a character vector of size 1 representing the path to the model dump file.") + } else if (!is.null(filename_dump) && !file.exists(filename_dump)) { stop("filename_dump: path to the model doesn't exist.") - } else if(is.null(filename_dump) & is.null(text)){ - stop("filename_dump: no path and no string version of the model dump have been provided.") + } else if(is.null(filename_dump) && is.null(model) && is.null(text)){ + stop("filename_dump & model & text: no path to dump model, no model, no text dump, have been provided.") } - if (!class(text) %in% c("character", "NULL")) { + + if (!class(model) %in% c("xgb.Booster", "NULL")) { + stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.") + } + + if (!class(text) %in% c("character", "NULL")) { stop("text: Has to be a vector of character or NULL if a path to the model dump has already been provided.") } + if (!class(n_first_tree) %in% c("numeric", "NULL") | length(n_first_tree) > 1) { stop("n_first_tree: Has to be a numeric vector of size 1.") } - if(is.null(text)){ + if(!is.null(model)){ + text = xgb.dump(model = model, with.stats = T) + } else if(!is.null(filename_dump)){ text <- readLines(filename_dump) %>% str_trim(side = "both") } diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R index 7fb23c88a..01261fab3 100644 --- a/R-package/R/xgb.plot.tree.R +++ b/R-package/R/xgb.plot.tree.R @@ -20,7 +20,9 @@ #' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument). #' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file. #' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models. -#' @param style a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information. +#' @param CSSstyle a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information. +#' @param width the width of the diagram in pixels. +#' @param height the height of the diagram in pixels. #' #' @return A \code{DiagrammeR} of the model. #' @@ -52,9 +54,9 @@ #' #' @export #' -xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, styles = NULL){ +xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){ - if (!(class(styles) %in% c("character", "NULL") && length(styles) == 1)) { + if (!(class(CSSstyle) %in% c("character", "NULL") && length(CSSstyle) <= 1)) { stop("style: Has to be a character vector of size 1.") } @@ -65,8 +67,7 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU if(is.null(model)){ allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree) } else { - text = xgb.dump(model = model, with.stats = T) - allTrees <- xgb.model.dt.tree(feature_names = feature_names, text = text, n_first_tree = n_first_tree) + allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree) } allTrees[Feature!="Leaf" ,yesPath:= paste(ID,"(", Feature, "
Cover: ", Cover, "
Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")] @@ -74,14 +75,14 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU allTrees[Feature!="Leaf" ,noPath:= paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")] - if(is.null(styles)){ - styles <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px" + if(is.null(CSSstyle)){ + CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px" } yes <- allTrees[Feature!="Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "") no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "") - path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(styles, yes, no, sep = ";") - DiagrammeR(path) + path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";") + DiagrammeR(path, width, height) } diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd index 2bc48c4d0..fb5bd94bd 100644 --- a/R-package/man/xgb.model.dt.tree.Rd +++ b/R-package/man/xgb.model.dt.tree.Rd @@ -4,14 +4,18 @@ \alias{xgb.model.dt.tree} \title{Convert tree model dump to data.table} \usage{ -xgb.model.dt.tree(feature_names = NULL, filename_dump = NULL, text = NULL, - n_first_tree = NULL) +xgb.model.dt.tree(feature_names = NULL, filename_dump = NULL, + model = NULL, text = NULL, n_first_tree = NULL) } \arguments{ \item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.} \item{filename_dump}{the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).} +\item{model}{dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.} + +\item{text}{dump generated by the \code{xgb.dump} function. Avoid the creation of a dump file. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).} + \item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.} } \value{ @@ -49,6 +53,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2, xgb.dump(bst, 'xgb.model.dump', with.stats = T) #agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix. -xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], 'xgb.model.dump') +xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], filename_dump = 'xgb.model.dump') } diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd index c1b8418cd..ce69d4431 100644 --- a/R-package/man/xgb.plot.tree.Rd +++ b/R-package/man/xgb.plot.tree.Rd @@ -5,7 +5,7 @@ \title{Plot a boosted tree model} \usage{ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL, - n_first_tree = NULL, styles = NULL) + n_first_tree = NULL, CSSstyle = NULL) } \arguments{ \item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.} @@ -16,7 +16,7 @@ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL, \item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.} -\item{style}{a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.} +\item{CSSstyle}{a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.} } \value{ A \code{DiagrammeR} of the model.