add limit number of trees option
This commit is contained in:
parent
a6c588f90d
commit
94d070da60
@ -17,6 +17,7 @@
|
|||||||
#' @importFrom DiagrammeR DiagrammeR
|
#' @importFrom DiagrammeR DiagrammeR
|
||||||
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
||||||
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (\code{with.stats = T} in function \code{xgb.dump}).
|
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (\code{with.stats = T} in function \code{xgb.dump}).
|
||||||
|
#' @param n_first_tree limit the plot to the n first trees.
|
||||||
#'
|
#'
|
||||||
#' @return A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
|
#' @return A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
|
||||||
#'
|
#'
|
||||||
@ -41,25 +42,30 @@
|
|||||||
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], 'xgb.model.dump')
|
#' xgb.plot.tree(agaricus.train$data@@Dimnames[[2]], 'xgb.model.dump')
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL){
|
xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, n_first_tree = NULL){
|
||||||
|
|
||||||
if (!class(feature_names) %in% c("character", "NULL")) {
|
if (!class(feature_names) %in% c("character", "NULL")) {
|
||||||
stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
|
stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
|
||||||
}
|
}
|
||||||
if (class(filename_dump) != "character" & file.exists(filename_dump)) {
|
if (class(filename_dump) != "character" || !file.exists(filename_dump)) {
|
||||||
stop("filename_dump: Has to be a path to the model dump file.")
|
stop("filename_dump: Has to be a path to the model dump file.")
|
||||||
}
|
}
|
||||||
|
if (!class(n_first_tree) %in% c("numeric", "NULL") | length(n_first_tree) > 1) {
|
||||||
|
stop("n_first_tree: Has to be a numeric vector of size 1.")
|
||||||
|
}
|
||||||
|
|
||||||
text <- readLines(filename_dump) %>% str_trim(side = "both")
|
text <- readLines(filename_dump) %>% str_trim(side = "both")
|
||||||
position <- str_match(text, "booster") %>% is.na %>% not %>% which %>% c(length(text)+1)
|
position <- str_match(text, "booster") %>% is.na %>% not %>% which %>% c(length(text)+1)
|
||||||
|
|
||||||
extract <- function(x, pattern) str_extract(x, pattern) %>% str_split("=") %>% lapply(function(x) x[2] %>% as.numeric) %>% unlist
|
extract <- function(x, pattern) str_extract(x, pattern) %>% str_split("=") %>% lapply(function(x) x[2] %>% as.numeric) %>% unlist
|
||||||
|
|
||||||
|
n_round <- min(length(position) - 1, n_first_tree)
|
||||||
|
|
||||||
addTreeId <- function(x, i) paste(i,x,sep = "-")
|
addTreeId <- function(x, i) paste(i,x,sep = "-")
|
||||||
|
|
||||||
allTrees <- data.table()
|
allTrees <- data.table()
|
||||||
|
|
||||||
for(i in 1:(length(position)-1)){
|
for(i in 1:n_round){
|
||||||
|
|
||||||
tree <- text[(position[i]+1):(position[i+1]-1)]
|
tree <- text[(position[i]+1):(position[i+1]-1)]
|
||||||
|
|
||||||
|
|||||||
@ -4,12 +4,15 @@
|
|||||||
\alias{xgb.plot.tree}
|
\alias{xgb.plot.tree}
|
||||||
\title{Plot a boosted tree model}
|
\title{Plot a boosted tree model}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.plot.tree(feature_names = NULL, filename_dump = NULL)
|
xgb.plot.tree(feature_names = NULL, filename_dump = NULL,
|
||||||
|
n_first_tree = NULL)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
|
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.}
|
||||||
|
|
||||||
\item{filename_dump}{the path to the text file storing the model. Model dump must include the gain per feature and per tree (\code{with.stats = T} in function \code{xgb.dump}).}
|
\item{filename_dump}{the path to the text file storing the model. Model dump must include the gain per feature and per tree (\code{with.stats = T} in function \code{xgb.dump}).}
|
||||||
|
|
||||||
|
\item{n_first_tree}{limit the plot to the n first trees.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
|
A \code{data.table} of the features used in the model with their average gain (and their weight for boosted tree model) in the model.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user