parse history first line to guess which columns are required

This commit is contained in:
El Potaeto 2015-01-01 22:43:23 +01:00
parent 8bbe45eed2
commit 4d0d65837d
2 changed files with 19 additions and 5 deletions

View File

@ -17,9 +17,13 @@ import(methods)
importClassesFrom(Matrix,dgCMatrix) importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgeMatrix) importClassesFrom(Matrix,dgeMatrix)
importFrom(data.table,":=") importFrom(data.table,":=")
importFrom(data.table,as.data.table)
importFrom(data.table,data.table) importFrom(data.table,data.table)
importFrom(data.table,rbindlist) importFrom(data.table,rbindlist)
importFrom(magrittr,"%>%") importFrom(magrittr,"%>%")
importFrom(stringr,str_extract) importFrom(stringr,str_extract)
importFrom(stringr,str_extract_all) importFrom(stringr,str_extract_all)
importFrom(stringr,str_match)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_split) importFrom(stringr,str_split)

View File

@ -3,11 +3,15 @@
#' The cross valudation function of xgboost #' The cross valudation function of xgboost
#' #'
#' @importFrom data.table data.table #' @importFrom data.table data.table
#' @importFrom data.table as.data.table
#' @importFrom magrittr %>% #' @importFrom magrittr %>%
#' @importFrom data.table := #' @importFrom data.table :=
#' @importFrom data.table rbindlist #' @importFrom data.table rbindlist
#' @importFrom stringr str_extract_all #' @importFrom stringr str_extract_all
#' @importFrom stringr str_split #' @importFrom stringr str_split
#' @importFrom stringr str_replace_all
#' @importFrom stringr str_replace
#' @importFrom stringr str_match
#' #'
#' @param params the list of parameters. Commonly used ones are: #' @param params the list of parameters. Commonly used ones are:
#' \itemize{ #' \itemize{
@ -98,14 +102,20 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
cat(paste(ret, "\n", sep="")) cat(paste(ret, "\n", sep=""))
} }
dt <- data.table(train_rmse_mean=numeric(), train_rmse_std=numeric(), train_auc_mean=numeric(), train_auc_std=numeric(), test_rmse_mean=numeric(), test_rmse_std=numeric(), test_auc_mean=numeric(), test_auc_std=numeric()) colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace_all("-", ".")
colnamesMean <- paste(colnames, "mean")
colnamesStd <- paste(colnames, "std")
colnames <- c()
for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
type <- rep(x = "numeric", times = length(colnames))
dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
split = str_split(string = history, pattern = "\t") split = str_split(string = history, pattern = "\t")
for(line in split){ for(line in split){
dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d.\\d*") %>% unlist %>% as.list %>% {vec <- .;rbindlist(list(dt, vec), use.names = F, fill = F)} dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d.\\d*") %>% unlist %>% as.list %>% {vec <- .;rbindlist(list(dt, vec), use.names = F, fill = F)}
} }
dt dt
} }