From 94d070da601379a41d32e10ddb37c9c8e1e42bdc Mon Sep 17 00:00:00 2001 From: El Potaeto Date: Tue, 6 Jan 2015 13:59:29 +0100 Subject: [PATCH] add limit number of trees option --- R-package/R/xgb.plot.tree.R | 12 +++++++++--- R-package/man/xgb.plot.tree.Rd | 5 ++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R index 7eb267298..4863fd7ca 100644 --- a/R-package/R/xgb.plot.tree.R +++ b/R-package/R/xgb.plot.tree.R @@ -17,6 +17,7 @@ #' @importFrom DiagrammeR DiagrammeR #' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}. #' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (\code{with.stats = T} in function \code{xgb.dump}). +#' @param n_first_tree limit the plot to the n first trees. #' #' @return A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model. #' @@ -41,25 +42,30 @@ #' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], 'xgb.model.dump') #' #' @export -xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL){ +xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, n_first_tree = NULL){ if (!class(feature_names) %in% c("character", "NULL")) { stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.") } - if (class(filename_dump) != "character" & file.exists(filename_dump)) { + if (class(filename_dump) != "character" || !file.exists(filename_dump)) { stop("filename_dump: Has to be a path to the model dump file.") } + if (!class(n_first_tree) %in% c("numeric", "NULL") | length(n_first_tree) > 1) { + stop("n_first_tree: Has to be a numeric vector of size 1.") + } text <- readLines(filename_dump) %>% str_trim(side = "both") position <- str_match(text, "booster") %>% is.na %>% not %>% which %>% c(length(text)+1) extract <- function(x, pattern) str_extract(x, pattern) %>% str_split("=") %>% lapply(function(x) x[2] %>% as.numeric) %>% unlist + n_round <- min(length(position) - 1, n_first_tree) + addTreeId <- function(x, i) paste(i,x,sep = "-") allTrees <- data.table() - for(i in 1:(length(position)-1)){ + for(i in 1:n_round){ tree <- text[(position[i]+1):(position[i+1]-1)] diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd index 08f8b9c94..eeec2f111 100644 --- a/R-package/man/xgb.plot.tree.Rd +++ b/R-package/man/xgb.plot.tree.Rd @@ -4,12 +4,15 @@ \alias{xgb.plot.tree} \title{Plot a boosted tree model} \usage{ -xgb.plot.tree(feature_names = NULL, filename_dump = NULL) +xgb.plot.tree(feature_names = NULL, filename_dump = NULL, + n_first_tree = NULL) } \arguments{ \item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.} \item{filename_dump}{the path to the text file storing the model. Model dump must include the gain per feature and per tree (\code{with.stats = T} in function \code{xgb.dump}).} + +\item{n_first_tree}{limit the plot to the n first trees.} } \value{ A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.