refactor dump function to adapt to the new possibilities of exporting a String
This commit is contained in:
parent
6fd8bbe71a
commit
3e1eea0eea
@ -32,5 +32,6 @@ importFrom(stringr,str_extract)
|
|||||||
importFrom(stringr,str_extract_all)
|
importFrom(stringr,str_extract_all)
|
||||||
importFrom(stringr,str_match)
|
importFrom(stringr,str_match)
|
||||||
importFrom(stringr,str_replace)
|
importFrom(stringr,str_replace)
|
||||||
|
importFrom(stringr,str_replace_all)
|
||||||
importFrom(stringr,str_split)
|
importFrom(stringr,str_split)
|
||||||
importFrom(stringr,str_trim)
|
importFrom(stringr,str_trim)
|
||||||
|
|||||||
@ -2,8 +2,11 @@
|
|||||||
#'
|
#'
|
||||||
#' Save a xgboost model to text file. Could be parsed later.
|
#' Save a xgboost model to text file. Could be parsed later.
|
||||||
#'
|
#'
|
||||||
|
#' @importFrom magrittr %>%
|
||||||
|
#' @importFrom stringr str_split
|
||||||
|
#' @importFrom stringr str_replace_all
|
||||||
#' @param model the model object.
|
#' @param model the model object.
|
||||||
#' @param fname the name of the binary file.
|
#' @param fname the name of the text file where to save the model. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.
|
||||||
#' @param fmap feature map file representing the type of feature.
|
#' @param fmap feature map file representing the type of feature.
|
||||||
#' Detailed description could be found at
|
#' Detailed description could be found at
|
||||||
#' \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
|
#' \url{https://github.com/tqchen/xgboost/wiki/Binary-Classification#dump-model}.
|
||||||
@ -15,6 +18,9 @@
|
|||||||
#' gain is the approximate loss function gain we get in each split;
|
#' gain is the approximate loss function gain we get in each split;
|
||||||
#' cover is the sum of second order gradient in each node.
|
#' cover is the sum of second order gradient in each node.
|
||||||
#'
|
#'
|
||||||
|
#' @return
|
||||||
|
#' if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
||||||
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' data(agaricus.test, package='xgboost')
|
#' data(agaricus.test, package='xgboost')
|
||||||
@ -25,15 +31,17 @@
|
|||||||
#' xgb.dump(bst, 'xgb.model.dump')
|
#' xgb.dump(bst, 'xgb.model.dump')
|
||||||
#' @export
|
#' @export
|
||||||
#'
|
#'
|
||||||
xgb.dump <- function(model, fname, fmap = "", with.stats=FALSE) {
|
xgb.dump <- function(model, fname = NULL, fmap = "", with.stats=FALSE) {
|
||||||
if (class(model) != "xgb.Booster") {
|
if (class(model) != "xgb.Booster") {
|
||||||
stop("xgb.dump: first argument must be type xgb.Booster")
|
stop("xgb.dump: first argument must be type xgb.Booster")
|
||||||
}
|
}
|
||||||
if (typeof(fname) != "character") {
|
if (!class(fname) %in% c("character", "NULL")) {
|
||||||
stop("xgb.dump: second argument must be type character")
|
stop("xgb.dump: second argument must be type character if provided")
|
||||||
}
|
}
|
||||||
result <- .Call("XGBoosterDumpModel_R", model, fmap, as.integer(with.stats), PACKAGE = "xgboost")
|
result <- .Call("XGBoosterDumpModel_R", model, fmap, as.integer(with.stats), PACKAGE = "xgboost")
|
||||||
|
|
||||||
|
if(is.null(fname)) return(str_split(result, "\n") %>% unlist %>% str_replace_all("\t"," ") %>% Filter(function(x) x != "", .))
|
||||||
|
|
||||||
writeLines(result, fname)
|
writeLines(result, fname)
|
||||||
#unlist(str_split(a, "\n"))==""
|
TRUE
|
||||||
return(TRUE)
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -4,12 +4,12 @@
|
|||||||
\alias{xgb.dump}
|
\alias{xgb.dump}
|
||||||
\title{Save xgboost model to text file}
|
\title{Save xgboost model to text file}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.dump(model, fname, fmap = "", with.stats = FALSE)
|
xgb.dump(model, fname = NULL, fmap = "", with.stats = FALSE)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{model}{the model object.}
|
\item{model}{the model object.}
|
||||||
|
|
||||||
\item{fname}{the name of the binary file.}
|
\item{fname}{the name of the text file where to save the model. If not provided or set to \code{NULL} the function will return the model as a \code{character} vector.}
|
||||||
|
|
||||||
\item{fmap}{feature map file representing the type of feature.
|
\item{fmap}{feature map file representing the type of feature.
|
||||||
Detailed description could be found at
|
Detailed description could be found at
|
||||||
@ -23,6 +23,9 @@ for example Format.}
|
|||||||
gain is the approximate loss function gain we get in each split;
|
gain is the approximate loss function gain we get in each split;
|
||||||
cover is the sum of second order gradient in each node.}
|
cover is the sum of second order gradient in each node.}
|
||||||
}
|
}
|
||||||
|
\value{
|
||||||
|
if fname is not provided or set to \code{NULL} the function will return the model as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
||||||
|
}
|
||||||
\description{
|
\description{
|
||||||
Save a xgboost model to text file. Could be parsed later.
|
Save a xgboost model to text file. Could be parsed later.
|
||||||
}
|
}
|
||||||
|
|||||||
@ -27,7 +27,7 @@ Results are returned for both linear and tree models.
|
|||||||
There are 3 columns :
|
There are 3 columns :
|
||||||
\itemize{
|
\itemize{
|
||||||
\item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump.
|
\item \code{Features} name of the features as provided in \code{feature_names} or already present in the model dump.
|
||||||
\item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means most important feature regarding the \code{label} used for the training ;
|
\item \code{Gain} contribution of each feature to the model. For boosted tree model, each gain of each feature of each tree is taken into account, then average per feature to give a vision of the entire model. Highest percentage means important feature to predict the \code{label} used for the training ;
|
||||||
\item \code{Cover} metric of the number of observation related to this feature (only available for tree models) ;
|
\item \code{Cover} metric of the number of observation related to this feature (only available for tree models) ;
|
||||||
\item \code{Weight} percentage representing the relative number of times a feature have been taken into trees. \code{Gain} should be prefered to search the most important feature. For boosted linear model, this column has no meaning.
|
\item \code{Weight} percentage representing the relative number of times a feature have been taken into trees. \code{Gain} should be prefered to search the most important feature. For boosted linear model, this column has no meaning.
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user