new included feature in dt.tree function
This commit is contained in:
parent
451944c52b
commit
9f5889f1e3
@ -41,6 +41,7 @@ importFrom(ggplot2,ylab)
|
|||||||
importFrom(magrittr,"%>%")
|
importFrom(magrittr,"%>%")
|
||||||
importFrom(magrittr,add)
|
importFrom(magrittr,add)
|
||||||
importFrom(magrittr,not)
|
importFrom(magrittr,not)
|
||||||
|
importFrom(stringr,str_detect)
|
||||||
importFrom(stringr,str_extract)
|
importFrom(stringr,str_extract)
|
||||||
importFrom(stringr,str_extract_all)
|
importFrom(stringr,str_extract_all)
|
||||||
importFrom(stringr,str_match)
|
importFrom(stringr,str_match)
|
||||||
|
|||||||
@ -14,6 +14,7 @@
|
|||||||
#' @importFrom stringr str_split
|
#' @importFrom stringr str_split
|
||||||
#' @importFrom stringr str_extract
|
#' @importFrom stringr str_extract
|
||||||
#' @importFrom stringr str_trim
|
#' @importFrom stringr str_trim
|
||||||
|
#' @importFrom stringr str_detect
|
||||||
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
|
||||||
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).
|
#' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).
|
||||||
#' @param model dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.
|
#' @param model dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.
|
||||||
@ -37,6 +38,8 @@
|
|||||||
#' \item \code{Quality}: it's the gain related to the split in this specific node ;
|
#' \item \code{Quality}: it's the gain related to the split in this specific node ;
|
||||||
#' \item \code{Cover}: metric to measure the number of observation affected by the split ;
|
#' \item \code{Cover}: metric to measure the number of observation affected by the split ;
|
||||||
#' \item \code{Tree}: ID of the tree. It is included in the main ID ;
|
#' \item \code{Tree}: ID of the tree. It is included in the main ID ;
|
||||||
|
#' \item \code{Yes.X} or \code{No.X}: data related to the pointer in \code{Yes} or \code{No} column ;
|
||||||
|
#' \item \code{Included}: \code{boolean} value which indicates if this value has been pointed by a Yes branch (\code{True}) or a No branch (\code{False}) ;
|
||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
@ -159,10 +162,12 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
|||||||
j = "No.Quality",
|
j = "No.Quality",
|
||||||
value = allTrees[ID == no,Quality])
|
value = allTrees[ID == no,Quality])
|
||||||
|
|
||||||
|
allTrees[,"Included":=F][ID == allTrees[!is.na(Yes), Yes], Included:=T][str_detect(ID, "-0$"), Included:=T]
|
||||||
|
|
||||||
allTrees
|
allTrees
|
||||||
}
|
}
|
||||||
|
|
||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
# The reason is that these variables are never declared
|
# The reason is that these variables are never declared
|
||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence"))
|
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence", "Included"))
|
||||||
@ -39,6 +39,8 @@ The content of the \code{data.table} is organised that way:
|
|||||||
\item \code{Quality}: it's the gain related to the split in this specific node ;
|
\item \code{Quality}: it's the gain related to the split in this specific node ;
|
||||||
\item \code{Cover}: metric to measure the number of observation affected by the split ;
|
\item \code{Cover}: metric to measure the number of observation affected by the split ;
|
||||||
\item \code{Tree}: ID of the tree. It is included in the main ID ;
|
\item \code{Tree}: ID of the tree. It is included in the main ID ;
|
||||||
|
\item \code{Yes.X} or \code{No.X}: data related to the pointer in \code{Yes} or \code{No} column ;
|
||||||
|
\item \code{Included}: \code{boolean} value which indicates if this value has been pointed by a Yes branch (\code{True}) or a No branch (\code{False}) ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user