refactor dump function to adapt to the new possibilities of exporting a String
This commit is contained in:
parent
6fd8bbe71a
commit
3e1eea0eea
@ -32,5 +32,6 @@ importFrom(stringr,str_extract)
|
||||
importFrom(stringr,str_extract_all)
|
||||
importFrom(stringr,str_match)
|
||||
importFrom(stringr,str_replace)
|
||||
importFrom(stringr,str_replace_all)
|
||||
importFrom(stringr,str_split)
|
||||
importFrom(stringr,str_trim)
|
||||
|
||||
@ -2,8 +2,11 @@
|
||||
#'
|
||||
#' Save a xgboost model to text file. Could be parsed later.
|
||||
#'
|
||||
#' @importFrom magrittr %>%
|
||||
#' @importFrom stringr str_split
|
||||
#' @importFrom stringr str_replace_all
|
||||
#' @param model the model object.
|
||||
#' @param fname the name of the binary file.
|
||||
#' @param fname the name of the text file where to save the model. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.
|
||||
#' @param fmap feature map file representing the type of feature.
|
||||
#' Detailed description could be found at
|
||||
#' \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
|
||||
@ -15,6 +18,9 @@
|
||||
#' gain is the approximate loss function gain we get in each split;
|
||||
#' cover is the sum of second order gradient in each node.
|
||||
#'
|
||||
#' @return
|
||||
#' if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
@ -25,15 +31,17 @@
|
||||
#' xgb.dump(bst, 'xgb.model.dump')
|
||||
#' @export
|
||||
#'
|
||||
xgb.dump <- function(model, fname, fmap = "", with.stats=FALSE) {
|
||||
xgb.dump <- function(model, fname = NULL, fmap = "", with.stats=FALSE) {
|
||||
if (class(model) != "xgb.Booster") {
|
||||
stop("xgb.dump: first argument must be type xgb.Booster")
|
||||
}
|
||||
if (typeof(fname) != "character") {
|
||||
stop("xgb.dump: second argument must be type character")
|
||||
if (!class(fname) %in% c("character", "NULL")) {
|
||||
stop("xgb.dump: second argument must be type character if provided")
|
||||
}
|
||||
result <- .Call("XGBoosterDumpModel_R", model, fmap, as.integer(with.stats), PACKAGE = "xgboost")
|
||||
|
||||
if(is.null(fname)) return(str_split(result, "\n") %>% unlist %>% str_replace_all("\t"," ") %>% Filter(function(x) x != "", .))
|
||||
|
||||
writeLines(result, fname)
|
||||
#unlist(str_split(a, "\n"))==""
|
||||
return(TRUE)
|
||||
TRUE
|
||||
}
|
||||
|
||||
@ -4,12 +4,12 @@
|
||||
\alias{xgb.dump}
|
||||
\title{Save xgboost model to text file}
|
||||
\usage{
|
||||
xgb.dump(model, fname, fmap = "", with.stats = FALSE)
|
||||
xgb.dump(model, fname = NULL, fmap = "", with.stats = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{the model object.}
|
||||
|
||||
\item{fname}{the name of the binary file.}
|
||||
\item{fname}{the name of the text file where to save the model. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.}
|
||||
|
||||
\item{fmap}{feature map file representing the type of feature.
|
||||
Detailed description could be found at
|
||||
@ -23,6 +23,9 @@ for example Format.}
|
||||
gain is the approximate loss function gain we get in each split;
|
||||
cover is the sum of second order gradient in each node.}
|
||||
}
|
||||
\value{
|
||||
if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
||||
}
|
||||
\description{
|
||||
Save a xgboost model to text file. Could be parsed later.
|
||||
}
|
||||
|
||||
@ -27,7 +27,7 @@ Results are returned for both linear and tree models.
|
||||
There are 3 columns :
|
||||
\itemize{
|
||||
\item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump.
|
||||
\item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means most important feature regarding the \code{label} used for the training ;
|
||||
\item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means important feature to predict the \code{label} used for the training ;
|
||||
\item \code{Cover} metric of the number of observation related to this feature (only available for tree models) ;
|
||||
\item \code{Weight} percentage representing the relative number of times a feature have been taken into trees. \code{Gain} should be prefered to search the most important feature. For boosted linear model, this column has no meaning.
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user