diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 23de90d28..a36e066ef 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -32,5 +32,6 @@ importFrom(stringr,str_extract) importFrom(stringr,str_extract_all) importFrom(stringr,str_match) importFrom(stringr,str_replace) +importFrom(stringr,str_replace_all) importFrom(stringr,str_split) importFrom(stringr,str_trim) diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R index 9a6e0ddd0..0d7e79f31 100644 --- a/R-package/R/xgb.dump.R +++ b/R-package/R/xgb.dump.R @@ -2,8 +2,11 @@ #' #' Save a xgboost model to text file. Could be parsed later. #' +#' @importFrom magrittr %>% +#' @importFrom stringr str_split +#' @importFrom stringr str_replace_all #' @param model the model object. -#' @param fname the name of the binary file. +#' @param fname the name of the text file where to save the model. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector. #' @param fmap feature map file representing the type of feature. #' Detailed description could be found at #' \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}. @@ -15,6 +18,9 @@ #' gain is the approximate loss function gain we get in each split; #' cover is the sum of second order gradient in each node. #' +#' @return +#' if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}. +#' #' @examples #' data(agaricus.train, package='xgboost') #' data(agaricus.test, package='xgboost') @@ -25,15 +31,17 @@ #' xgb.dump(bst, 'xgb.model.dump') #' @export #' -xgb.dump <- function(model, fname, fmap = "", with.stats=FALSE) { +xgb.dump <- function(model, fname = NULL, fmap = "", with.stats=FALSE) { if (class(model) != "xgb.Booster") { stop("xgb.dump: first argument must be type xgb.Booster") } - if (typeof(fname) != "character") { - stop("xgb.dump: second argument must be type character") + if (!class(fname) %in% c("character", "NULL")) { + stop("xgb.dump: second argument must be type character if provided") } result <- .Call("XGBoosterDumpModel_R", model, fmap, as.integer(with.stats), PACKAGE = "xgboost") + + if(is.null(fname)) return(str_split(result, "\n") %>% unlist %>% str_replace_all("\t"," ") %>% Filter(function(x) x != "", .)) + writeLines(result, fname) - #unlist(str_split(a, "\n"))=="" - return(TRUE) + TRUE } diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd index bcecc6abd..e779f32b9 100644 --- a/R-package/man/xgb.dump.Rd +++ b/R-package/man/xgb.dump.Rd @@ -4,12 +4,12 @@ \alias{xgb.dump} \title{Save xgboost model to text file} \usage{ -xgb.dump(model, fname, fmap = "", with.stats = FALSE) +xgb.dump(model, fname = NULL, fmap = "", with.stats = FALSE) } \arguments{ \item{model}{the model object.} -\item{fname}{the name of the binary file.} +\item{fname}{the name of the text file where to save the model. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.} \item{fmap}{feature map file representing the type of feature. Detailed description could be found at @@ -23,6 +23,9 @@ for example Format.} gain is the approximate loss function gain we get in each split; cover is the sum of second order gradient in each node.} } +\value{ +if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}. +} \description{ Save a xgboost model to text file. Could be parsed later. } diff --git a/R-package/man/xgb.importance.Rd b/R-package/man/xgb.importance.Rd index 8aa58cddd..78be4b91b 100644 --- a/R-package/man/xgb.importance.Rd +++ b/R-package/man/xgb.importance.Rd @@ -27,7 +27,7 @@ Results are returned for both linear and tree models. There are 3 columns : \itemize{ \item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump. - \item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means most important feature regarding the \code{label} used for the training ; + \item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means important feature to predict the \code{label} used for the training ; \item \code{Cover} metric of the number of observation related to this feature (only available for tree models) ; \item \code{Weight} percentage representing the relative number of times a feature have been taken into trees. \code{Gain} should be prefered to search the most important feature. For boosted linear model, this column has no meaning. }