remove buggy feature

This commit is contained in:
El Potaeto 2015-02-06 11:44:09 +01:00
parent a82a942cd6
commit 85186a2e55
2 changed files with 3 additions and 4 deletions

View File

@ -73,7 +73,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
}
treeDump <- function(feature_names, text){
result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[Feature!="Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N, Included = sum(Included)), by = Feature][,`:=`(Gain=Gain/sum(Gain),Cover=Cover/sum(Cover), Frequence = Frequence/sum(Frequence), Included = Included/Frequence)][,Gain:= ifelse(Included >= 0.5, Gain, -Gain)][order(-Gain)]
result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[Feature!="Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = Feature][,`:=`(Gain = Gain/sum(Gain), Cover = Cover/sum(Cover), Frequence = Frequence/sum(Frequence))]
result
}

View File

@ -39,7 +39,6 @@
#' \item \code{Cover}: metric to measure the number of observation affected by the split ;
#' \item \code{Tree}: ID of the tree. It is included in the main ID ;
#' \item \code{Yes.X} or \code{No.X}: data related to the pointer in \code{Yes} or \code{No} column ;
#' \item \code{Included}: \code{boolean} value which indicates if this feature has been pointed by a Yes branch (\code{True}) or a No branch (\code{False}). By convention stem feature is always included ;
#' }
#'
#' @examples
@ -129,7 +128,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
qualityLeaf <- extract(leaf, "leaf=\\-*\\d*\\.*\\d*")
coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID][,"Included":=F][ID == yesBranch, Included:=T][1, Included:=T]
dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID]
allTrees <- rbindlist(list(allTrees, dt), use.names = T, fill = F)
}
@ -168,4 +167,4 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence", "Included"))
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence"))