Merge pull request #124 from pommedeterresautee/master
Add a new function to see importance of features in a model
This commit is contained in:
commit
0c7e090c19
1
.gitignore
vendored
1
.gitignore
vendored
@ -44,3 +44,4 @@ Debug
|
||||
*dump
|
||||
*save
|
||||
*csv
|
||||
.Rproj.user
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
Package: xgboost
|
||||
Type: Package
|
||||
Title: eXtreme Gradient Boosting
|
||||
Version: 0.3-2
|
||||
Date: 2014-08-23
|
||||
Version: 0.3-3
|
||||
Date: 2014-12-28
|
||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
|
||||
Maintainer: Tong He <hetong007@gmail.com>
|
||||
Description: This package is a R wrapper of xgboost, which is short for eXtreme
|
||||
@ -21,4 +21,6 @@ Depends:
|
||||
R (>= 2.10)
|
||||
Imports:
|
||||
Matrix (>= 1.1-0),
|
||||
methods
|
||||
methods,
|
||||
data.table (>= 1.9),
|
||||
magrittr (>= 1.5)
|
||||
@ -1,4 +1,4 @@
|
||||
# Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
# Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
|
||||
export(getinfo)
|
||||
export(setinfo)
|
||||
@ -7,6 +7,7 @@ export(xgb.DMatrix)
|
||||
export(xgb.DMatrix.save)
|
||||
export(xgb.cv)
|
||||
export(xgb.dump)
|
||||
export(xgb.importance)
|
||||
export(xgb.load)
|
||||
export(xgb.save)
|
||||
export(xgb.train)
|
||||
@ -15,3 +16,6 @@ exportMethods(predict)
|
||||
import(methods)
|
||||
importClassesFrom(Matrix,dgCMatrix)
|
||||
importClassesFrom(Matrix,dgeMatrix)
|
||||
importFrom(data.table,":=")
|
||||
importFrom(data.table,data.table)
|
||||
importFrom(magrittr,"%>%")
|
||||
|
||||
56
R-package/R/xgb.importance.R
Normal file
56
R-package/R/xgb.importance.R
Normal file
@ -0,0 +1,56 @@
|
||||
#' Show importance of features in a model
|
||||
#'
|
||||
#' Read a xgboost model in text file format.
|
||||
#' Can be tree or linear model (text dump of linear model are only supported in dev version of Xgboost for now).
|
||||
#'
|
||||
#' Return a data.table of the features with their weight.
|
||||
#' #'
|
||||
#' @importFrom data.table data.table
|
||||
#' @importFrom magrittr %>%
|
||||
#' @importFrom data.table :=
|
||||
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix.
|
||||
#' @param filename_dump the name of the text file.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#'
|
||||
#' #Both dataset are list with two items, a sparse matrix and labels (outcome column which will be learned).
|
||||
#' #Each column of the sparse Matrix is a feature in one hot encoding format.
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#'
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||
#' eta = 1, nround = 2,objective = "binary:logistic")
|
||||
#' xgb.dump(bst, 'xgb.model.dump')
|
||||
#'
|
||||
#' #agaricus.test$data@@Dimnames[[2]] represents the column name of the sparse matrix.
|
||||
#' xgb.importance(agaricus.test$data@@Dimnames[[2]], 'xgb.model.dump')
|
||||
#'
|
||||
#' @export
|
||||
xgb.importance <- function(feature_names, filename_dump){
|
||||
text <- readLines(filename_dump)
|
||||
if(text[2] == "bias:"){
|
||||
result <- linearDump(feature_names, text)
|
||||
} else {
|
||||
result <- treeDump(feature_names, text)
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
treeDump <- function(feature_names, text){
|
||||
result <- c()
|
||||
for(line in text){
|
||||
p <- regexec("\\[f.*\\]", line) %>% regmatches(line, .)
|
||||
if (length(p[[1]]) > 0) {
|
||||
splits <- sub("\\[f", "", p[[1]]) %>% sub("\\]", "", .) %>% strsplit("<") %>% .[[1]] %>% as.numeric
|
||||
result <- c(result, feature_names[splits[1]+ 1])
|
||||
}
|
||||
}
|
||||
#1. Reduce, 2. %, 3. reorder - bigger top, 4. remove temp col
|
||||
data.table(Feature = result)[,.N, by = Feature][, Weight:= N /sum(N)][order(-rank(Weight))][,-2,with=F]
|
||||
}
|
||||
|
||||
linearDump <- function(feature_names, text){
|
||||
which(text == "weight:") %>% {a=.+1;text[a:length(text)]} %>% as.numeric %>% data.table(Feature = feature_names, Weight = .)
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgboost.R
|
||||
\docType{data}
|
||||
\name{agaricus.test}
|
||||
\alias{agaricus.test}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgboost.R
|
||||
\docType{data}
|
||||
\name{agaricus.train}
|
||||
\alias{agaricus.train}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/getinfo.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
\name{getinfo}
|
||||
\alias{getinfo}
|
||||
@ -12,9 +13,9 @@ getinfo(object, ...)
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.DMatrix"}
|
||||
|
||||
\item{name}{the name of the field to get}
|
||||
|
||||
\item{...}{other parameters}
|
||||
|
||||
\item{name}{the name of the field to get}
|
||||
}
|
||||
\description{
|
||||
Get information of an xgb.DMatrix object
|
||||
|
||||
@ -1,11 +1,12 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/predict.xgb.Booster.R
|
||||
\docType{methods}
|
||||
\name{predict,xgb.Booster-method}
|
||||
\alias{predict,xgb.Booster-method}
|
||||
\title{Predict method for eXtreme Gradient Boosting model}
|
||||
\usage{
|
||||
\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE,
|
||||
ntreelimit = NULL)
|
||||
\S4method{predict}{xgb.Booster}(object, newdata, missing = NULL,
|
||||
outputmargin = FALSE, ntreelimit = NULL)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.Boost"}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/setinfo.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
\name{setinfo}
|
||||
\alias{setinfo}
|
||||
@ -12,11 +13,11 @@ setinfo(object, ...)
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.DMatrix"}
|
||||
|
||||
\item{...}{other parameters}
|
||||
|
||||
\item{name}{the name of the field to get}
|
||||
|
||||
\item{info}{the specific field of information to set}
|
||||
|
||||
\item{...}{other parameters}
|
||||
}
|
||||
\description{
|
||||
Set information of an xgb.DMatrix object
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/slice.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
\name{slice}
|
||||
\alias{slice}
|
||||
@ -13,9 +14,9 @@ slice(object, ...)
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.DMatrix"}
|
||||
|
||||
\item{idxset}{a integer vector of indices of rows needed}
|
||||
|
||||
\item{...}{other parameters}
|
||||
|
||||
\item{idxset}{a integer vector of indices of rows needed}
|
||||
}
|
||||
\description{
|
||||
Get a new DMatrix containing the specified rows of
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.DMatrix.R
|
||||
\name{xgb.DMatrix}
|
||||
\alias{xgb.DMatrix}
|
||||
\title{Contruct xgb.DMatrix object}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.DMatrix.save.R
|
||||
\name{xgb.DMatrix.save}
|
||||
\alias{xgb.DMatrix.save}
|
||||
\title{Save xgb.DMatrix object to binary file}
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.cv.R
|
||||
\name{xgb.cv}
|
||||
\alias{xgb.cv}
|
||||
\title{Cross Validation}
|
||||
\usage{
|
||||
xgb.cv(params = list(), data, nrounds, nfold, label = NULL, showsd = TRUE,
|
||||
metrics = list(), obj = NULL, feval = NULL, ...)
|
||||
xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
|
||||
missing = NULL, showsd = TRUE, metrics = list(), obj = NULL,
|
||||
feval = NULL, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{params}{the list of parameters. Commonly used ones are:
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.dump.R
|
||||
\name{xgb.dump}
|
||||
\alias{xgb.dump}
|
||||
\title{Save xgboost model to text file}
|
||||
|
||||
33
R-package/man/xgb.importance.Rd
Normal file
33
R-package/man/xgb.importance.Rd
Normal file
@ -0,0 +1,33 @@
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.importance.R
|
||||
\name{xgb.importance}
|
||||
\alias{xgb.importance}
|
||||
\title{Show importance of features in a model}
|
||||
\usage{
|
||||
xgb.importance(feature_names, filename_dump)
|
||||
}
|
||||
\arguments{
|
||||
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix.}
|
||||
|
||||
\item{filename_dump}{the name of the text file.}
|
||||
}
|
||||
\description{
|
||||
Read a xgboost model in text file format. Return a data.table of the features with their weight.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
|
||||
#Both dataset are list with two items, a sparse matrix and labels (outcome column which will be learned).
|
||||
#Each column of the sparse Matrix is a feature in one hot encoding format.
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
|
||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||
eta = 1, nround = 2,objective = "binary:logistic")
|
||||
xgb.dump(bst, 'xgb.model.dump')
|
||||
|
||||
#agaricus.test$data@Dimnames[[2]] represents the column name of the sparse matrix.
|
||||
xgb.importance(agaricus.test$data@Dimnames[[2]], 'xgb.model.dump')
|
||||
}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.load.R
|
||||
\name{xgb.load}
|
||||
\alias{xgb.load}
|
||||
\title{Load xgboost model from binary file}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.save.R
|
||||
\name{xgb.save}
|
||||
\alias{xgb.save}
|
||||
\title{Save xgboost model to binary file}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgb.train.R
|
||||
\name{xgb.train}
|
||||
\alias{xgb.train}
|
||||
\title{eXtreme Gradient Boosting Training}
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
||||
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||
% Please edit documentation in R/xgboost.R
|
||||
\name{xgboost}
|
||||
\alias{xgboost}
|
||||
\title{eXtreme Gradient Boosting (Tree) library}
|
||||
\usage{
|
||||
xgboost(data = NULL, label = NULL, params = list(), nrounds,
|
||||
verbose = 1, ...)
|
||||
xgboost(data = NULL, label = NULL, missing = NULL, params = list(),
|
||||
nrounds, verbose = 1, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user