From 271e8202a7b077f9a5afab25eca3d67471303878 Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Fri, 3 Apr 2015 12:20:34 -0500 Subject: [PATCH] force xgb.cv to return numeric performance values instead of character; update its docs --- R-package/R/xgb.cv.R | 15 +++++++++++---- R-package/man/xgb.cv.Rd | 10 ++++++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 38f70b973..db2ecf103 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -50,12 +50,19 @@ #' @param feval custimized evaluation function. Returns #' \code{list(metric='metric-name', value='metric-value')} with given #' prediction and dtrain. -#' @param stratified \code{boolean}, whether the sampling of folds should be stratified by the values of labels in \code{data} +#' @param stratified \code{boolean}, whether sampling of folds should be stratified by the values of labels in \code{data} #' @param verbose \code{boolean}, print the statistics during the process #' @param ... other parameters to pass to \code{params}. #' -#' @return A \code{data.table} with each mean and standard deviation stat for training set and test set. -#' +#' @return +#' If \code{prediction = TRUE}, a list with the following elements is returned: +#' \itemize{ +#' \item \code{dt} a \code{data.table} with each mean and standard deviation stat for training set and test set +#' \item \code{pred} an array or matrix (for multiclass classification) with predictions for each CV-fold for the model having been trained on the data in all other folds. +#' } +#' +#' If \code{prediction = FALSE}, just a \code{data.table} with each mean and standard deviation stat for training set and test set is returned. +#' #' @details #' The original sample is randomly partitioned into \code{nfold} equal size subsamples. #' @@ -148,7 +155,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table split <- str_split(string = history, pattern = "\t") - for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.list %>% {vec <- .; rbindlist(list(dt, vec), use.names = F, fill = F)} + for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)} if (prediction) { return(list(dt = dt,pred = predictValues)) diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd index 6f8ee06cb..20423f76a 100644 --- a/R-package/man/xgb.cv.Rd +++ b/R-package/man/xgb.cv.Rd @@ -57,14 +57,20 @@ gradient with given prediction and dtrain.} \code{list(metric='metric-name', value='metric-value')} with given prediction and dtrain.} -\item{stratified}{\code{boolean}, whether the sampling of folds should be stratified by the values of labels in \code{data}} +\item{stratified}{\code{boolean}, whether sampling of folds should be stratified by the values of labels in \code{data}} \item{verbose}{\code{boolean}, print the statistics during the process} \item{...}{other parameters to pass to \code{params}.} } \value{ -A \code{data.table} with each mean and standard deviation stat for training set and test set. +If \code{prediction = TRUE}, a list with the following elements is returned: +\itemize{ + \item \code{dt} a \code{data.table} with each mean and standard deviation stat for training set and test set + \item \code{pred} an array or matrix (for multiclass classification) with predictions for each CV-fold for the model having been trained on the data in all other folds. +} + +If \code{prediction = FALSE}, just a \code{data.table} with each mean and standard deviation stat for training set and test set is returned. } \description{ The cross valudation function of xgboost