new included feature in dt.tree function

2015-02-04 23:59:53 +01:00 · 2015-02-04 23:59:53 +01:00 · 9f5889f1e3
commit 9f5889f1e3
parent 451944c52b
3 changed files with 9 additions and 1 deletions
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -41,6 +41,7 @@ importFrom(ggplot2,ylab)
 importFrom(magrittr,"%>%")
 importFrom(magrittr,add)
 importFrom(magrittr,not)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
 importFrom(stringr,str_extract_all)
 importFrom(stringr,str_match)
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@ -14,6 +14,7 @@
 #' @importFrom stringr str_split
 #' @importFrom stringr str_extract
 #' @importFrom stringr str_trim
 #' @importFrom stringr str_detect
 #' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix (see example). If model dump already contains feature names, this argument should be \code{NULL}.
 #' @param filename_dump the path to the text file storing the model. Model dump must include the gain per feature and per tree (parameter \code{with.stats = T} in function \code{xgb.dump}).
 #' @param model dump generated by the \code{xgb.train} function. Avoid the creation of a dump file.
@ -37,6 +38,8 @@
 #'  \item \code{Quality}: it's the gain related to the split in this specific node ;
 #'  \item \code{Cover}: metric to measure the number of observation affected by the split ;
 #'  \item \code{Tree}: ID of the tree. It is included in the main ID ;
 #'  \item \code{Yes.X} or \code{No.X}: data related to the pointer in \code{Yes} or \code{No} column ;
 #'  \item \code{Included}:  \code{boolean} value which indicates if this value has been pointed by a Yes branch (\code{True}) or a No branch (\code{False}) ;
 #' } 
 #'   
 #' @examples
@ -159,10 +162,12 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
      j = "No.Quality", 
      value = allTrees[ID == no,Quality])
  allTrees[,"Included":=F][ID == allTrees[!is.na(Yes), Yes], Included:=T][str_detect(ID, "-0$"), Included:=T]
  allTrees
 }
 # Avoid error messages during CRAN check.
 # The reason is that these variables are never declared
 # They are mainly column names inferred by Data.table...
-globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence"))
+globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence", "Included"))
--- a/R-package/man/xgb.model.dt.tree.Rd
+++ b/R-package/man/xgb.model.dt.tree.Rd
@ -39,6 +39,8 @@ The content of the \code{data.table} is organised that way:
 \item \code{Quality}: it's the gain related to the split in this specific node ;
 \item \code{Cover}: metric to measure the number of observation affected by the split ;
 \item \code{Tree}: ID of the tree. It is included in the main ID ;
 \item \code{Yes.X} or \code{No.X}: data related to the pointer in \code{Yes} or \code{No} column ;
 \item \code{Included}:  \code{boolean} value which indicates if this value has been pointed by a Yes branch (\code{True}) or a No branch (\code{False}) ;
 }
 }
 \examples{