multi trees

This commit is contained in:
pommedeterresautee 2015-05-17 15:16:28 +02:00
parent 3104f1f806
commit 947afd7eac
2 changed files with 45 additions and 2 deletions

View File

@ -96,13 +96,14 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
allTrees <- data.table()
anynumber_regex<-"[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
for(i in 1:n_round){
tree <- text[(position[i]+1):(position[i+1]-1)]
# avoid tree made of a leaf only (no split)
if(length(tree) <2) next
if(length(tree) < 2) next
treeID <- i-1

View File

@ -0,0 +1,42 @@
library(stringr)
library(data.table)
data(agaricus.train, package='xgboost')
#Both dataset are list with two items, a sparse matrix and labels
#(labels = outcome column which will be learned).
#Each column of the sparse Matrix is a feature in one hot encoding format.
train <- agaricus.train
bst <- xgboost(data = train$data, label = train$label, max.depth = 5,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#agaricus.test$data@Dimnames[[2]] represents the column names of the sparse matrix.
tree.matrix <- xgb.model.dt.tree(agaricus.train$data@Dimnames[[2]], model = bst)
# first number of the path represents the tree, then the following numbers are related to the path to follow
# root init
root.nodes <- tree.matrix[str_detect(ID, "\\d+-0"), ID]
tree.matrix[ID == root.nodes, Abs.Position:=root.nodes]
precedent.nodes <- root.nodes
while(tree.matrix[,sum(is.na(Abs.Position))] > 0) {
yes.row.nodes <- tree.matrix[Abs.Position %in% precedent.nodes & !is.na(Yes)]
no.row.nodes <- tree.matrix[Abs.Position %in% precedent.nodes & !is.na(No)]
yes.nodes.abs.pos <- yes.row.nodes[, Abs.Position] %>% paste0("-0")
no.nodes.abs.pos <- no.row.nodes[, Abs.Position] %>% paste0("-1")
tree.matrix[ID == yes.row.nodes[, Yes], Abs.Position := yes.nodes.abs.pos]
tree.matrix[ID == no.row.nodes[, No], Abs.Position := no.nodes.abs.pos]
precedent.nodes <- c(yes.nodes.abs.pos, no.nodes.abs.pos)
}
tree.matrix