refactor dump function to adapt to the new possibilities of exporting a String

2015-01-09 00:14:01 +01:00 · 2015-01-09 00:14:01 +01:00 · 3e1eea0eea
commit 3e1eea0eea
parent 6fd8bbe71a
4 changed files with 21 additions and 9 deletions
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@ -32,5 +32,6 @@ importFrom(stringr,str_extract)
 importFrom(stringr,str_extract_all)
 importFrom(stringr,str_match)
 importFrom(stringr,str_replace)
+importFrom(stringr,str_replace_all)
 importFrom(stringr,str_split)
 importFrom(stringr,str_trim)
--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@ -2,8 +2,11 @@
 #' 
 #' Save a xgboost model to text file. Could be parsed later.
 #' 
+#' @importFrom magrittr %>%
+#' @importFrom stringr str_split
+#' @importFrom stringr str_replace_all
 #' @param model the model object.
-#' @param fname the name of the binary file.
+#' @param fname the name of the text file where to save the model. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.
 #' @param fmap feature map file representing the type of feature. 
 #'        Detailed description could be found at 
 #'        \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
@ -15,6 +18,9 @@
 #'        gain is the approximate loss function gain we get in each split;
 #'        cover is the sum of second order gradient in each node.
 #'
+#' @return
+#' if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
+#'
 #' @examples
 #' data(agaricus.train, package='xgboost')
 #' data(agaricus.test, package='xgboost')
@ -25,15 +31,17 @@
 #' xgb.dump(bst, 'xgb.model.dump')
 #' @export
 #' 
-xgb.dump <- function(model, fname, fmap = "", with.stats=FALSE) {
+xgb.dump <- function(model, fname = NULL, fmap = "", with.stats=FALSE) {
  if (class(model) != "xgb.Booster") {
    stop("xgb.dump: first argument must be type xgb.Booster")
  }
-  if (typeof(fname) != "character") {
-    stop("xgb.dump: second argument must be type character")
+  if (!class(fname) %in% c("character", "NULL")) {
+    stop("xgb.dump: second argument must be type character if provided")
  }
  result <- .Call("XGBoosterDumpModel_R", model, fmap, as.integer(with.stats), PACKAGE = "xgboost")
+  
+  if(is.null(fname)) return(str_split(result, "\n") %>% unlist %>% str_replace_all("\t","  ") %>% Filter(function(x) x != "", .))
+  
  writeLines(result, fname)
-  #unlist(str_split(a, "\n"))==""
-  return(TRUE)
+  TRUE
 } 
--- a/R-package/man/xgb.dump.Rd
+++ b/R-package/man/xgb.dump.Rd
@ -4,12 +4,12 @@
 \alias{xgb.dump}
 \title{Save xgboost model to text file}
 \usage{
-xgb.dump(model, fname, fmap = "", with.stats = FALSE)
+xgb.dump(model, fname = NULL, fmap = "", with.stats = FALSE)
 }
 \arguments{
 \item{model}{the model object.}

-\item{fname}{the name of the binary file.}
+\item{fname}{the name of the text file where to save the model. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.}

 \item{fmap}{feature map file representing the type of feature.
 Detailed description could be found at
@ -23,6 +23,9 @@ for example Format.}
       gain is the approximate loss function gain we get in each split;
       cover is the sum of second order gradient in each node.}
 }
+\value{
+if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
+}
 \description{
 Save a xgboost model to text file. Could be parsed later.
 }
--- a/R-package/man/xgb.importance.Rd
+++ b/R-package/man/xgb.importance.Rd
@ -27,7 +27,7 @@ Results are returned for both linear and tree models.
 There are 3 columns :
 \itemize{
  \item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump.
-  \item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means most important feature regarding the \code{label} used for the training ;
+  \item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means important feature to predict the \code{label} used for the training ;
  \item \code{Cover} metric of the number of observation related to this feature (only available for tree models) ;
  \item \code{Weight} percentage representing the relative number of times a feature have been taken into trees. \code{Gain} should be prefered to search the most important feature. For boosted linear model, this column has no meaning.
 }