plot all trees

This commit is contained in:
El Potaeto 2015-01-04 21:56:41 +01:00
parent 33bb168574
commit f6290ad792

View File

@ -7,41 +7,51 @@ position <- str_match(text, "booster") %>% is.na %>% not %>% which %>% c(length(
extract <- function(x, pattern) str_extract(x, pattern) %>% str_split("=") %>% lapply(function(x) x[2] %>% as.numeric) %>% unlist extract <- function(x, pattern) str_extract(x, pattern) %>% str_split("=") %>% lapply(function(x) x[2] %>% as.numeric) %>% unlist
#for(i in 1:(length(position)-1)){ addTreeId <- function(x, i) paste(i,x,sep = "-")
i=1
cat(paste("\n",i,"\n")) allTrees <- data.table()
for(i in 1:(length(position)-1)){
tree <- text[(position[i]+1):(position[i+1]-1)] tree <- text[(position[i]+1):(position[i+1]-1)]
paste(tree, collapse = "\n") %>% cat
notLeaf <- str_match(tree, "leaf") %>% is.na notLeaf <- str_match(tree, "leaf") %>% is.na
leaf <- notLeaf %>% not %>% tree[.] leaf <- notLeaf %>% not %>% tree[.]
branch <- notLeaf %>% tree[.] branch <- notLeaf %>% tree[.]
idBranch <- str_extract(branch, "\\d*:") %>% str_replace(":", "") %>% as.numeric idBranch <- str_extract(branch, "\\d*:") %>% str_replace(":", "") %>% addTreeId(i)
idLeaf <- str_extract(leaf, "\\d*:") %>% str_replace(":", "") %>% as.numeric idLeaf <- str_extract(leaf, "\\d*:") %>% str_replace(":", "") %>% addTreeId(i)
featureBranch <- str_extract(branch, "f\\d*<") %>% str_replace("<", "") #%>% as.numeric featureBranch <- str_extract(branch, "f\\d*<") %>% str_replace("<", "") %>% str_replace("f", "") %>% as.numeric
featureLeaf <- rep("Leaf", length(leaf)) featureLeaf <- rep("Leaf", length(leaf))
yesBranch <- extract(branch, "yes=\\d*") splitBranch <- str_extract(branch, "<\\d*\\.*\\d*\\]") %>% str_replace("<", "") %>% str_replace("\\]", "")
splitLeaf <- rep(NA, length(leaf))
yesBranch <- extract(branch, "yes=\\d*") %>% addTreeId(i)
yesLeaf <- rep(NA, length(leaf)) yesLeaf <- rep(NA, length(leaf))
noBranch <- extract(branch, "no=\\d*") noBranch <- extract(branch, "no=\\d*") %>% addTreeId(i)
noLeaf <- rep(NA, length(leaf)) noLeaf <- rep(NA, length(leaf))
missingBranch <- extract(branch, "missing=\\d+") missingBranch <- extract(branch, "missing=\\d+") %>% addTreeId(i)
missingLeaf <- rep(NA, length(leaf)) missingLeaf <- rep(NA, length(leaf))
qualityBranch <- extract(branch, "gain=\\d*\\.*\\d*") qualityBranch <- extract(branch, "gain=\\d*\\.*\\d*")
qualityLeaf <- extract(leaf, "leaf=\\-*\\d*\\.*\\d*") qualityLeaf <- extract(leaf, "leaf=\\-*\\d*\\.*\\d*")
coverBranch <- extract(branch, "cover=\\d*\\.*\\d*") coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*") coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=i] dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=i]
set(dt, j = "YesFeature", value = ifelse(is.na(dt[,Yes]),NA,dt[ID == dt[,Yes], ID])) set(dt, i = which(dt[,Feature]!= "Leaf"), j = "YesFeature", value = dt[ID == dt[,Yes], Feature])
set(dt, j = "NoFeature", value = ifelse(is.na(dt[,No]),NA,dt[ID == dt[,No], ID])) set(dt, i = which(dt[,Feature]!= "Leaf"), j = "NoFeature", value = dt[ID == dt[,No], Feature])
dtBranch <- dt[Feature!="Leaf"]
yesPath <- paste(dtBranch[,ID], "-->", dtBranch[,Yes], sep = "") dt[Feature!="Leaf" ,yesPath:= paste(ID,"[", Feature, "]-->|< ", Split, "|", Yes, "[", YesFeature, "]", sep = "")]
noPath <- paste(dtBranch[,ID], "-->", dtBranch[,No], sep = "")
missingPath <- paste(dtBranch[,ID], "-->|Missing|", dtBranch[,Missing], sep = "")
yesPathStyle <- paste("style ", dtBranch[,Yes], " fill:#A2EB86, stroke:#04C4AB, stroke-width:2px", sep = "")
noPathStyle <- paste("style ", dtBranch[,No], " fill:#FFA070, stroke:#FF5E5E, stroke-width:2px", sep = "")
path <- c(yesPath, noPath, yesPathStyle, noPathStyle) %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "",sep = ";") dt[Feature!="Leaf" ,noPath:= paste(ID,"[", Feature, "]-->|> ", Split, "|", No, "[", NoFeature, "]", sep = "")]
DiagrammeR(path, height = 400) #missingPath <- paste(dtBranch[,ID], "-->|Missing|", dtBranch[,Missing], sep = "")
dt[Feature!="Leaf", yesPathStyle := paste("style ", Yes, " fill:#A2EB86, stroke:#04C4AB, stroke-width:2px", sep = "")]
dt[Feature!="Leaf", noPathStyle := paste("style ", No, " fill:#FFA070, stroke:#FF5E5E, stroke-width:2px", sep = "")]
allTrees <- rbindlist(list(allTrees, dt), use.names = T, fill = F)
}
path <- dt[Feature!="Leaf", c(yesPath, noPath, yesPathStyle, noPathStyle)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "",sep = ";")
DiagrammeR(path, height =700)
#} #}