Add a new verbose parameter to print progress during the process (set to true by default to not change behavior of existing code) + source code refactoring
This commit is contained in:
parent
4d0d65837d
commit
cdea1685e5
@ -25,5 +25,4 @@ importFrom(stringr,str_extract)
|
||||
importFrom(stringr,str_extract_all)
|
||||
importFrom(stringr,str_match)
|
||||
importFrom(stringr,str_replace)
|
||||
importFrom(stringr,str_replace_all)
|
||||
importFrom(stringr,str_split)
|
||||
|
||||
@ -8,8 +8,8 @@
|
||||
#' @importFrom data.table :=
|
||||
#' @importFrom data.table rbindlist
|
||||
#' @importFrom stringr str_extract_all
|
||||
#' @importFrom stringr str_extract
|
||||
#' @importFrom stringr str_split
|
||||
#' @importFrom stringr str_replace_all
|
||||
#' @importFrom stringr str_replace
|
||||
#' @importFrom stringr str_match
|
||||
#'
|
||||
@ -31,7 +31,7 @@
|
||||
#' @param nrounds the max number of iterations
|
||||
#' @param nfold number of folds used
|
||||
#' @param label option field, when data is Matrix
|
||||
#' @param showsd boolean, whether show standard deviation of cross validation
|
||||
#' @param showsd \code{boolean}, whether show standard deviation of cross validation
|
||||
#' @param metrics, list of evaluation metrics to be used in corss validation,
|
||||
#' when it is not specified, the evaluation metric is chosen according to objective function.
|
||||
#' Possible options are:
|
||||
@ -49,9 +49,10 @@
|
||||
#' prediction and dtrain,
|
||||
#' @param missing Missing is only used when input is dense matrix, pick a float
|
||||
# value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
|
||||
#' @param verbose \code{boolean}, print the statistics during the process.
|
||||
#' @param ... other parameters to pass to \code{params}.
|
||||
#'
|
||||
#' @return a \code{data.table} with each mean and standard deviation stat for training set and test set.
|
||||
#' @return A \code{data.table} with each mean and standard deviation stat for training set and test set.
|
||||
#'
|
||||
#' @details
|
||||
#' This is the cross validation function for xgboost
|
||||
@ -66,10 +67,11 @@
|
||||
#' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
#' history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"),
|
||||
#' "max.depth"=3, "eta"=1, "objective"="binary:logistic")
|
||||
#' print(history)
|
||||
#' @export
|
||||
#'
|
||||
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NULL,
|
||||
showsd = TRUE, metrics=list(), obj = NULL, feval = NULL, ...) {
|
||||
showsd = TRUE, metrics=list(), obj = NULL, feval = NULL, verbose = T,...) {
|
||||
if (typeof(params) != "list") {
|
||||
stop("xgb.cv: first argument params must be list")
|
||||
}
|
||||
@ -94,28 +96,24 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
for (k in 1:nfold) {
|
||||
fd <- folds[[k]]
|
||||
succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)
|
||||
msg[[k]] <- strsplit(xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval),
|
||||
"\t")[[1]]
|
||||
msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
|
||||
}
|
||||
ret <- xgb.cv.aggcv(msg, showsd)
|
||||
history <- c(history, ret)
|
||||
cat(paste(ret, "\n", sep=""))
|
||||
if(verbose) paste(ret, "\n", sep="") %>% cat
|
||||
}
|
||||
|
||||
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace_all("-", ".")
|
||||
|
||||
colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
|
||||
colnamesMean <- paste(colnames, "mean")
|
||||
colnamesStd <- paste(colnames, "std")
|
||||
|
||||
colnames <- c()
|
||||
for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
|
||||
|
||||
type <- rep(x = "numeric", times = length(colnames))
|
||||
|
||||
dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
|
||||
|
||||
split = str_split(string = history, pattern = "\t")
|
||||
for(line in split){
|
||||
dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d.\\d*") %>% unlist %>% as.list %>% {vec <- .;rbindlist(list(dt, vec), use.names = F, fill = F)}
|
||||
}
|
||||
|
||||
for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d.\\d*") %>% unlist %>% as.list %>% {vec <- .; rbindlist(list(dt, vec), use.names = F, fill = F)}
|
||||
dt
|
||||
}
|
||||
@ -6,7 +6,7 @@
|
||||
\usage{
|
||||
xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
|
||||
missing = NULL, showsd = TRUE, metrics = list(), obj = NULL,
|
||||
feval = NULL, ...)
|
||||
feval = NULL, verbose = T, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{params}{the list of parameters. Commonly used ones are:
|
||||
@ -34,7 +34,7 @@ xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
|
||||
|
||||
\item{missing}{Missing is only used when input is dense matrix, pick a float}
|
||||
|
||||
\item{showsd}{boolean, whether show standard deviation of cross validation}
|
||||
\item{showsd}{\code{boolean}, whether show standard deviation of cross validation}
|
||||
|
||||
\item{metrics,}{list of evaluation metrics to be used in corss validation,
|
||||
when it is not specified, the evaluation metric is chosen according to objective function.
|
||||
@ -54,10 +54,12 @@ gradient with given prediction and dtrain,}
|
||||
\code{list(metric='metric-name', value='metric-value')} with given
|
||||
prediction and dtrain,}
|
||||
|
||||
\item{verbose}{\code{boolean}, print the statistics during the process.}
|
||||
|
||||
\item{...}{other parameters to pass to \code{params}.}
|
||||
}
|
||||
\value{
|
||||
a \code{data.table} with each mean and standard deviation stat for training set and test set.
|
||||
A \code{data.table} with each mean and standard deviation stat for training set and test set.
|
||||
}
|
||||
\description{
|
||||
The cross valudation function of xgboost
|
||||
@ -75,5 +77,6 @@ data(agaricus.train, package='xgboost')
|
||||
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
|
||||
history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"),
|
||||
"max.depth"=3, "eta"=1, "objective"="binary:logistic")
|
||||
print(history)
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user