diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index 51cf2a258..710143fa1 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -73,7 +73,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N } treeDump <- function(feature_names, text){ - result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[Feature!="Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N, Included = sum(Included)), by = Feature][,`:=`(Gain=Gain/sum(Gain),Cover=Cover/sum(Cover), Frequence = Frequence/sum(Frequence), Included = Included/Frequence)][,Gain:= ifelse(Included >= 0.5, Gain, -Gain)][order(-Gain)] + result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[Feature!="Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = Feature][,`:=`(Gain = Gain/sum(Gain), Cover = Cover/sum(Cover), Frequence = Frequence/sum(Frequence))] result } diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R index c88c16989..9c570c44b 100644 --- a/R-package/R/xgb.model.dt.tree.R +++ b/R-package/R/xgb.model.dt.tree.R @@ -39,7 +39,6 @@ #' \item \code{Cover}: metric to measure the number of observation affected by the split ; #' \item \code{Tree}: ID of the tree. It is included in the main ID ; #' \item \code{Yes.X} or \code{No.X}: data related to the pointer in \code{Yes} or \code{No} column ; -#' \item \code{Included}: \code{boolean} value which indicates if this feature has been pointed by a Yes branch (\code{True}) or a No branch (\code{False}). By convention stem feature is always included ; #' } #' #' @examples @@ -129,7 +128,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model qualityLeaf <- extract(leaf, "leaf=\\-*\\d*\\.*\\d*") coverBranch <- extract(branch, "cover=\\d*\\.*\\d*") coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*") - dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID][,"Included":=F][ID == yesBranch, Included:=T][1, Included:=T] + dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID] allTrees <- rbindlist(list(allTrees, dt), use.names = T, fill = F) } @@ -168,4 +167,4 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model # Avoid error messages during CRAN check. # The reason is that these variables are never declared # They are mainly column names inferred by Data.table... -globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence", "Included")) \ No newline at end of file +globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence")) \ No newline at end of file