return history as data.table for cross validation + documentation

This commit is contained in:
El Potaeto 2015-01-01 16:05:43 +01:00
parent 34aaeff3d9
commit a524a51a06
3 changed files with 22 additions and 5 deletions

View File

@ -18,5 +18,6 @@ importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgeMatrix)
importFrom(data.table,":=")
importFrom(data.table,data.table)
importFrom(data.table,rbindlist)
importFrom(magrittr,"%>%")
importFrom(stringr,str_extract)

View File

@ -2,6 +2,11 @@
#'
#' The cross valudation function of xgboost
#'
#' @importFrom data.table data.table
#' @importFrom magrittr %>%
#' @importFrom data.table :=
#' @importFrom data.table rbindlist
#' @importFrom stringr str_extract
#' @param params the list of parameters. Commonly used ones are:
#' \itemize{
#' \item \code{objective} objective function, common ones are
@ -40,6 +45,8 @@
# value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#' @param ... other parameters to pass to \code{params}.
#'
#' @return a \code{data.table} with each mean and standard deviation stat for training set and test set.
#'
#' @details
#' This is the cross validation function for xgboost
#'
@ -88,9 +95,15 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
history <- c(history, ret)
cat(paste(ret, "\n", sep=""))
}
return (history)
dt <- data.table(train_rmse_mean=numeric(), train_rmse_std=numeric(), train_auc_mean=numeric(), train_auc_std=numeric(), test_rmse_mean=numeric(), test_rmse_std=numeric(), test_auc_mean=numeric(), test_auc_std=numeric())
split = str_split(string = history, pattern = "\t")
for(line in split){
dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d.\\d*") %>% unlist %>% as.list %>% {vec <- .;rbindlist(list(dt, vec), use.names = F, fill = F)}
}
dt
}
xgb.cv.strip.numeric <- function(x) {
as.numeric(strsplit(regmatches(x, regexec("test-(.*):(.*)$", x))[[1]][3], "\\+")[[1]])
}

View File

@ -56,6 +56,9 @@ prediction and dtrain,}
\item{...}{other parameters to pass to \code{params}.}
}
\value{
a \code{data.table} with each mean and standard deviation stat for training set and test set.
}
\description{
The cross valudation function of xgboost
}