diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R
index 7dd3a8ca3..174d92704 100644
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@@ -64,9 +64,9 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
}
if(text[2] == "bias:"){
- result <- linearDump(feature_names, text)
+ result <- readLines(filename_dump) %>% linearDump(feature_names, .)
} else {
- result <- treeDump(feature_names, text)
+ result <- treeDump(feature_names, text = text)
}
result
}
diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R
index 3e0723c61..5ad6c6b3d 100644
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@@ -16,6 +16,8 @@
#' @importFrom stringr str_trim
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).
+#' @param model dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.
+#' @param text dump generated by the \code{xgb.dump} function. Avoid the creation of a dump file. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
#'
#' @return A \code{data.table} of the features used in the model with their gain, cover and few other thing.
@@ -49,29 +51,37 @@
#' xgb.dump(bst, 'xgb.model.dump', with.stats = T)
#'
#' #agaricus.test$data@@Dimnames[[2]] represents the column names of the sparse matrix.
-#' xgb.model.dt.tree(agaricus.train$data@@Dimnames[[2]], 'xgb.model.dump')
+#' xgb.model.dt.tree(agaricus.train$data@@Dimnames[[2]], filename_dump = 'xgb.model.dump')
#'
#' @export
-xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, text = NULL, n_first_tree = NULL){
+xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, text = NULL, n_first_tree = NULL){
if (!class(feature_names) %in% c("character", "NULL")) {
stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
}
- if (!class(filename_dump) %in% c("character", "NULL")) {
- stop("filename_dump: Has to be a character vector representing the path to the model dump file.")
- } else if (class(filename_dump) == "character" && !file.exists(filename_dump)) {
+ if (!(class(filename_dump) %in% c("character", "NULL") && length(filename_dump) <= 1)) {
+ stop("filename_dump: Has to be a character vector of size 1 representing the path to the model dump file.")
+ } else if (!is.null(filename_dump) && !file.exists(filename_dump)) {
stop("filename_dump: path to the model doesn't exist.")
- } else if(is.null(filename_dump) & is.null(text)){
- stop("filename_dump: no path and no string version of the model dump have been provided.")
+ } else if(is.null(filename_dump) && is.null(model) && is.null(text)){
+ stop("filename_dump & model & text: no path to dump model, no model, no text dump, have been provided.")
}
- if (!class(text) %in% c("character", "NULL")) {
+
+ if (!class(model) %in% c("xgb.Booster", "NULL")) {
+ stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
+ }
+
+ if (!class(text) %in% c("character", "NULL")) {
stop("text: Has to be a vector of character or NULL if a path to the model dump has already been provided.")
}
+
if (!class(n_first_tree) %in% c("numeric", "NULL") | length(n_first_tree) > 1) {
stop("n_first_tree: Has to be a numeric vector of size 1.")
}
- if(is.null(text)){
+ if(!is.null(model)){
+ text = xgb.dump(model = model, with.stats = T)
+ } else if(!is.null(filename_dump)){
text <- readLines(filename_dump) %>% str_trim(side = "both")
}
diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R
index 7fb23c88a..01261fab3 100644
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@@ -20,7 +20,9 @@
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}). Possible to provide a model directly (see \code{model} argument).
#' @param model generated by the \code{xgb.train} function. Avoid the creation of a dump file.
#' @param n_first_tree limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.
-#' @param style a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.
+#' @param CSSstyle a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.
+#' @param width the width of the diagram in pixels.
+#' @param height the height of the diagram in pixels.
#'
#' @return A \code{DiagrammeR} of the model.
#'
@@ -52,9 +54,9 @@
#'
#' @export
#'
-xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, styles = NULL){
+xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NULL, n_first_tree = NULL, CSSstyle = NULL, width = NULL, height = NULL){
- if (!(class(styles) %in% c("character", "NULL") && length(styles) == 1)) {
+ if (!(class(CSSstyle) %in% c("character", "NULL") && length(CSSstyle) <= 1)) {
stop("style: Has to be a character vector of size 1.")
}
@@ -65,8 +67,7 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
if(is.null(model)){
allTrees <- xgb.model.dt.tree(feature_names = feature_names, filename_dump = filename_dump, n_first_tree = n_first_tree)
} else {
- text = xgb.dump(model = model, with.stats = T)
- allTrees <- xgb.model.dt.tree(feature_names = feature_names, text = text, n_first_tree = n_first_tree)
+ allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
}
allTrees[Feature!="Leaf" ,yesPath:= paste(ID,"(", Feature, "
Cover: ", Cover, "
Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
@@ -74,14 +75,14 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
allTrees[Feature!="Leaf" ,noPath:= paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
- if(is.null(styles)){
- styles <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
+ if(is.null(CSSstyle)){
+ CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
}
yes <- allTrees[Feature!="Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
- path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(styles, yes, no, sep = ";")
- DiagrammeR(path)
+ path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
+ DiagrammeR(path, width, height)
}
diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd
index 2bc48c4d0..fb5bd94bd 100644
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@@ -4,14 +4,18 @@
\alias{xgb.model.dt.tree}
\title{Convert tree model dump to data.table}
\usage{
-xgb.model.dt.tree(feature_names = NULL, filename_dump = NULL, text = NULL,
- n_first_tree = NULL)
+xgb.model.dt.tree(feature_names = NULL, filename_dump = NULL,
+ model = NULL, text = NULL, n_first_tree = NULL)
}
\arguments{
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
\item{filename_dump}{the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).}
+\item{model}{dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.}
+
+\item{text}{dump generated by the \code{xgb.dump} function. Avoid the creation of a dump file. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).}
+
\item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}
}
\value{
@@ -49,6 +53,6 @@ bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
xgb.dump(bst, 'xgb.model.dump', with.stats = T)
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
-xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], 'xgb.model.dump')
+xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], filename_dump = 'xgb.model.dump')
}
diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd
index c1b8418cd..ce69d4431 100644
--- a/R-package/man/xgb.plot.tree.Rd
+++ b/R-package/man/xgb.plot.tree.Rd
@@ -5,7 +5,7 @@
\title{Plot a boosted tree model}
\usage{
xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
- n_first_tree = NULL, styles = NULL)
+ n_first_tree = NULL, CSSstyle = NULL)
}
\arguments{
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
@@ -16,7 +16,7 @@ xgb.plot.tree(feature_names = NULL, filename_dump = NULL, model = NULL,
\item{n_first_tree}{limit the plot to the n first trees. If \code{NULL}, all trees of the model are plotted. Performance can be low for huge models.}
-\item{style}{a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.}
+\item{CSSstyle}{a \code{character} vector storing a css style to customize the appearance of nodes. Look at the \href{https://github.com/knsv/mermaid/wiki}{Mermaid wiki} for more information.}
}
\value{
A \code{DiagrammeR} of the model.