Merge pull request #225 from chrissly31415/master
Fixing parsing of model dump text file in R
This commit is contained in:
commit
0ea28c35c4
@ -95,7 +95,8 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
|||||||
addTreeId <- function(x, i) paste(i,x,sep = "-")
|
addTreeId <- function(x, i) paste(i,x,sep = "-")
|
||||||
|
|
||||||
allTrees <- data.table()
|
allTrees <- data.table()
|
||||||
|
|
||||||
|
anynumber_regex<-"[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
|
||||||
for(i in 1:n_round){
|
for(i in 1:n_round){
|
||||||
|
|
||||||
tree <- text[(position[i]+1):(position[i+1]-1)]
|
tree <- text[(position[i]+1):(position[i+1]-1)]
|
||||||
@ -115,7 +116,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
|||||||
featureBranch <- feature_names[featureBranch + 1]
|
featureBranch <- feature_names[featureBranch + 1]
|
||||||
}
|
}
|
||||||
featureLeaf <- rep("Leaf", length(leaf))
|
featureLeaf <- rep("Leaf", length(leaf))
|
||||||
splitBranch <- str_extract(branch, "<\\d*\\.*\\d*\\]") %>% str_replace("<", "") %>% str_replace("\\]", "")
|
splitBranch <- str_extract(branch, paste0("<",anynumber_regex,"\\]")) %>% str_replace("<", "") %>% str_replace("\\]", "")
|
||||||
splitLeaf <- rep(NA, length(leaf))
|
splitLeaf <- rep(NA, length(leaf))
|
||||||
yesBranch <- extract(branch, "yes=\\d*") %>% addTreeId(treeID)
|
yesBranch <- extract(branch, "yes=\\d*") %>% addTreeId(treeID)
|
||||||
yesLeaf <- rep(NA, length(leaf))
|
yesLeaf <- rep(NA, length(leaf))
|
||||||
@ -123,8 +124,8 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
|||||||
noLeaf <- rep(NA, length(leaf))
|
noLeaf <- rep(NA, length(leaf))
|
||||||
missingBranch <- extract(branch, "missing=\\d+") %>% addTreeId(treeID)
|
missingBranch <- extract(branch, "missing=\\d+") %>% addTreeId(treeID)
|
||||||
missingLeaf <- rep(NA, length(leaf))
|
missingLeaf <- rep(NA, length(leaf))
|
||||||
qualityBranch <- extract(branch, "gain=\\d*\\.*\\d*")
|
qualityBranch <- extract(branch, paste0("gain=",anynumber_regex))
|
||||||
qualityLeaf <- extract(leaf, "leaf=\\-*\\d*\\.*\\d*")
|
qualityLeaf <- extract(leaf, paste0("leaf=",anynumber_regex))
|
||||||
coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
|
coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
|
||||||
coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
|
coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
|
||||||
dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID]
|
dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user