From cdea1685e581c081b72a7107bb6fb899fa6c5c2f Mon Sep 17 00:00:00 2001
From: El Potaeto <pommedeterresautee@msn.com>
Date: Fri, 2 Jan 2015 11:21:53 +0100
Subject: [PATCH] Add a new verbose parameter to print progress during the
 process (set to true by default to not change behavior of existing code) +
 source code refactoring

---
 R-package/NAMESPACE     |  1 -
 R-package/R/xgb.cv.R    | 26 ++++++++++++--------------
 R-package/man/xgb.cv.Rd |  9 ++++++---
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index bd12fc7ec..6e74d9ac2 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -25,5 +25,4 @@ importFrom(stringr,str_extract)
 importFrom(stringr,str_extract_all)
 importFrom(stringr,str_match)
 importFrom(stringr,str_replace)
-importFrom(stringr,str_replace_all)
 importFrom(stringr,str_split)
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index c2e73e202..7256980c6 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -8,8 +8,8 @@
 #' @importFrom data.table :=
 #' @importFrom data.table rbindlist
 #' @importFrom stringr str_extract_all
+#' @importFrom stringr str_extract
 #' @importFrom stringr str_split
-#' @importFrom stringr str_replace_all
 #' @importFrom stringr str_replace
 #' @importFrom stringr str_match
 #' 
@@ -31,7 +31,7 @@
 #' @param nrounds the max number of iterations
 #' @param nfold number of folds used
 #' @param label option field, when data is Matrix
-#' @param showsd boolean, whether show standard deviation of cross validation
+#' @param showsd \code{boolean}, whether show standard deviation of cross validation
 #' @param metrics, list of evaluation metrics to be used in corss validation,
 #'   when it is not specified, the evaluation metric is chosen according to objective function.
 #'   Possible options are:
@@ -49,9 +49,10 @@
 #'   prediction and dtrain,
 #' @param missing Missing is only used when input is dense matrix, pick a float
 #     value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
+#' @param verbose \code{boolean}, print the statistics during the process.
 #' @param ... other parameters to pass to \code{params}.
 #' 
-#' @return a \code{data.table} with each mean and standard deviation stat for training set and test set.
+#' @return A \code{data.table} with each mean and standard deviation stat for training set and test set.
 #' 
 #' @details 
 #' This is the cross validation function for xgboost
@@ -66,10 +67,11 @@
 #' dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 #' history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"),
 #'                   "max.depth"=3, "eta"=1, "objective"="binary:logistic")
+#' print(history)
 #' @export
 #'
 xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NULL, 
-                   showsd = TRUE, metrics=list(), obj = NULL, feval = NULL, ...) {
+                   showsd = TRUE, metrics=list(), obj = NULL, feval = NULL, verbose = T,...) {
   if (typeof(params) != "list") {
     stop("xgb.cv: first argument params must be list")
   }
@@ -94,28 +96,24 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
     for (k in 1:nfold) {
       fd <- folds[[k]]
       succ <- xgb.iter.update(fd$booster, fd$dtrain, i - 1, obj)      
-      msg[[k]] <- strsplit(xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval), 
-                           "\t")[[1]]
+      msg[[k]] <- xgb.iter.eval(fd$booster, fd$watchlist, i - 1, feval) %>% str_split("\t") %>% .[[1]]
     }
     ret <- xgb.cv.aggcv(msg, showsd)
     history <- c(history, ret)
-    cat(paste(ret, "\n", sep=""))
+    if(verbose) paste(ret, "\n", sep="") %>% cat
   }
   
-  colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace_all("-", ".")
-  
+  colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
   colnamesMean <- paste(colnames, "mean")
   colnamesStd <- paste(colnames, "std")
+  
   colnames <- c()
   for(i in 1:length(colnamesMean)) colnames <- c(colnames, colnamesMean[i], colnamesStd[i])
   
   type <- rep(x = "numeric", times = length(colnames))
-  
   dt <- read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
-  
   split = str_split(string = history, pattern = "\t")
-  for(line in split){
-    dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d.\\d*") %>% unlist %>% as.list %>% {vec <- .;rbindlist(list(dt, vec), use.names = F, fill = F)}
-  }
+  
+  for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d.\\d*") %>% unlist %>% as.list %>% {vec <- .; rbindlist(list(dt, vec), use.names = F, fill = F)}
   dt
 }
\ No newline at end of file
diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index 19f04ee79..7ba5eb727 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -6,7 +6,7 @@
 \usage{
 xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
   missing = NULL, showsd = TRUE, metrics = list(), obj = NULL,
-  feval = NULL, ...)
+  feval = NULL, verbose = T, ...)
 }
 \arguments{
 \item{params}{the list of parameters. Commonly used ones are:
@@ -34,7 +34,7 @@ xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
 
 \item{missing}{Missing is only used when input is dense matrix, pick a float}
 
-\item{showsd}{boolean, whether show standard deviation of cross validation}
+\item{showsd}{\code{boolean}, whether show standard deviation of cross validation}
 
 \item{metrics,}{list of evaluation metrics to be used in corss validation,
   when it is not specified, the evaluation metric is chosen according to objective function.
@@ -54,10 +54,12 @@ gradient with given prediction and dtrain,}
 \code{list(metric='metric-name', value='metric-value')} with given
 prediction and dtrain,}
 
+\item{verbose}{\code{boolean}, print the statistics during the process.}
+
 \item{...}{other parameters to pass to \code{params}.}
 }
 \value{
-a \code{data.table} with each mean and standard deviation stat for training set and test set.
+A \code{data.table} with each mean and standard deviation stat for training set and test set.
 }
 \description{
 The cross valudation function of xgboost
@@ -75,5 +77,6 @@ data(agaricus.train, package='xgboost')
 dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
 history <- xgb.cv(data = dtrain, nround=3, nfold = 5, metrics=list("rmse","auc"),
                   "max.depth"=3, "eta"=1, "objective"="binary:logistic")
+print(history)
 }