remove buggy feature
This commit is contained in:
parent
a82a942cd6
commit
85186a2e55
@ -73,7 +73,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
|
|||||||
}
|
}
|
||||||
|
|
||||||
treeDump <- function(feature_names, text){
|
treeDump <- function(feature_names, text){
|
||||||
result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[Feature!="Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N, Included = sum(Included)), by = Feature][,`:=`(Gain=Gain/sum(Gain),Cover=Cover/sum(Cover), Frequence = Frequence/sum(Frequence), Included = Included/Frequence)][,Gain:= ifelse(Included >= 0.5, Gain, -Gain)][order(-Gain)]
|
result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[Feature!="Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = Feature][,`:=`(Gain = Gain/sum(Gain), Cover = Cover/sum(Cover), Frequence = Frequence/sum(Frequence))]
|
||||||
|
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|||||||
@ -39,7 +39,6 @@
|
|||||||
#' \item \code{Cover}: metric to measure the number of observation affected by the split ;
|
#' \item \code{Cover}: metric to measure the number of observation affected by the split ;
|
||||||
#' \item \code{Tree}: ID of the tree. It is included in the main ID ;
|
#' \item \code{Tree}: ID of the tree. It is included in the main ID ;
|
||||||
#' \item \code{Yes.X} or \code{No.X}: data related to the pointer in \code{Yes} or \code{No} column ;
|
#' \item \code{Yes.X} or \code{No.X}: data related to the pointer in \code{Yes} or \code{No} column ;
|
||||||
#' \item \code{Included}: \code{boolean} value which indicates if this feature has been pointed by a Yes branch (\code{True}) or a No branch (\code{False}). By convention stem feature is always included ;
|
|
||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
@ -129,7 +128,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
|||||||
qualityLeaf <- extract(leaf, "leaf=\\-*\\d*\\.*\\d*")
|
qualityLeaf <- extract(leaf, "leaf=\\-*\\d*\\.*\\d*")
|
||||||
coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
|
coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
|
||||||
coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
|
coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
|
||||||
dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID][,"Included":=F][ID == yesBranch, Included:=T][1, Included:=T]
|
dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID]
|
||||||
|
|
||||||
allTrees <- rbindlist(list(allTrees, dt), use.names = T, fill = F)
|
allTrees <- rbindlist(list(allTrees, dt), use.names = T, fill = F)
|
||||||
}
|
}
|
||||||
@ -168,4 +167,4 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
|||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
# The reason is that these variables are never declared
|
# The reason is that these variables are never declared
|
||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence", "Included"))
|
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence"))
|
||||||
Loading…
x
Reference in New Issue
Block a user