Merge pull request #124 from pommedeterresautee/master
Add a new function to see importance of features in a model
This commit is contained in:
commit
0c7e090c19
1
.gitignore
vendored
1
.gitignore
vendored
@ -44,3 +44,4 @@ Debug
|
|||||||
*dump
|
*dump
|
||||||
*save
|
*save
|
||||||
*csv
|
*csv
|
||||||
|
.Rproj.user
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: eXtreme Gradient Boosting
|
Title: eXtreme Gradient Boosting
|
||||||
Version: 0.3-2
|
Version: 0.3-3
|
||||||
Date: 2014-08-23
|
Date: 2014-12-28
|
||||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
|
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>
|
||||||
Maintainer: Tong He <hetong007@gmail.com>
|
Maintainer: Tong He <hetong007@gmail.com>
|
||||||
Description: This package is a R wrapper of xgboost, which is short for eXtreme
|
Description: This package is a R wrapper of xgboost, which is short for eXtreme
|
||||||
@ -21,4 +21,6 @@ Depends:
|
|||||||
R (>= 2.10)
|
R (>= 2.10)
|
||||||
Imports:
|
Imports:
|
||||||
Matrix (>= 1.1-0),
|
Matrix (>= 1.1-0),
|
||||||
methods
|
methods,
|
||||||
|
data.table (>= 1.9),
|
||||||
|
magrittr (>= 1.5)
|
||||||
@ -1,4 +1,4 @@
|
|||||||
# Generated by roxygen2 (4.0.1): do not edit by hand
|
# Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
|
||||||
export(getinfo)
|
export(getinfo)
|
||||||
export(setinfo)
|
export(setinfo)
|
||||||
@ -7,6 +7,7 @@ export(xgb.DMatrix)
|
|||||||
export(xgb.DMatrix.save)
|
export(xgb.DMatrix.save)
|
||||||
export(xgb.cv)
|
export(xgb.cv)
|
||||||
export(xgb.dump)
|
export(xgb.dump)
|
||||||
|
export(xgb.importance)
|
||||||
export(xgb.load)
|
export(xgb.load)
|
||||||
export(xgb.save)
|
export(xgb.save)
|
||||||
export(xgb.train)
|
export(xgb.train)
|
||||||
@ -15,3 +16,6 @@ exportMethods(predict)
|
|||||||
import(methods)
|
import(methods)
|
||||||
importClassesFrom(Matrix,dgCMatrix)
|
importClassesFrom(Matrix,dgCMatrix)
|
||||||
importClassesFrom(Matrix,dgeMatrix)
|
importClassesFrom(Matrix,dgeMatrix)
|
||||||
|
importFrom(data.table,":=")
|
||||||
|
importFrom(data.table,data.table)
|
||||||
|
importFrom(magrittr,"%>%")
|
||||||
|
|||||||
56
R-package/R/xgb.importance.R
Normal file
56
R-package/R/xgb.importance.R
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
#' Show importance of features in a model
|
||||||
|
#'
|
||||||
|
#' Read a xgboost model in text file format.
|
||||||
|
#' Can be tree or linear model (text dump of linear model are only supported in dev version of Xgboost for now).
|
||||||
|
#'
|
||||||
|
#' Return a data.table of the features with their weight.
|
||||||
|
#' #'
|
||||||
|
#' @importFrom data.table data.table
|
||||||
|
#' @importFrom magrittr %>%
|
||||||
|
#' @importFrom data.table :=
|
||||||
|
#' @param feature_names names of each feature as a character vector. Can be extracted from a sparse matrix.
|
||||||
|
#' @param filename_dump the name of the text file.
|
||||||
|
#'
|
||||||
|
#' @examples
|
||||||
|
#' data(agaricus.train, package='xgboost')
|
||||||
|
#' data(agaricus.test, package='xgboost')
|
||||||
|
#'
|
||||||
|
#' #Both dataset are list with two items, a sparse matrix and labels (outcome column which will be learned).
|
||||||
|
#' #Each column of the sparse Matrix is a feature in one hot encoding format.
|
||||||
|
#' train <- agaricus.train
|
||||||
|
#' test <- agaricus.test
|
||||||
|
#'
|
||||||
|
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
|
#' eta = 1, nround = 2,objective = "binary:logistic")
|
||||||
|
#' xgb.dump(bst, 'xgb.model.dump')
|
||||||
|
#'
|
||||||
|
#' #agaricus.test$data@@Dimnames[[2]] represents the column name of the sparse matrix.
|
||||||
|
#' xgb.importance(agaricus.test$data@@Dimnames[[2]], 'xgb.model.dump')
|
||||||
|
#'
|
||||||
|
#' @export
|
||||||
|
xgb.importance <- function(feature_names, filename_dump){
|
||||||
|
text <- readLines(filename_dump)
|
||||||
|
if(text[2] == "bias:"){
|
||||||
|
result <- linearDump(feature_names, text)
|
||||||
|
} else {
|
||||||
|
result <- treeDump(feature_names, text)
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
treeDump <- function(feature_names, text){
|
||||||
|
result <- c()
|
||||||
|
for(line in text){
|
||||||
|
p <- regexec("\\[f.*\\]", line) %>% regmatches(line, .)
|
||||||
|
if (length(p[[1]]) > 0) {
|
||||||
|
splits <- sub("\\[f", "", p[[1]]) %>% sub("\\]", "", .) %>% strsplit("<") %>% .[[1]] %>% as.numeric
|
||||||
|
result <- c(result, feature_names[splits[1]+ 1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#1. Reduce, 2. %, 3. reorder - bigger top, 4. remove temp col
|
||||||
|
data.table(Feature = result)[,.N, by = Feature][, Weight:= N /sum(N)][order(-rank(Weight))][,-2,with=F]
|
||||||
|
}
|
||||||
|
|
||||||
|
linearDump <- function(feature_names, text){
|
||||||
|
which(text == "weight:") %>% {a=.+1;text[a:length(text)]} %>% as.numeric %>% data.table(Feature = feature_names, Weight = .)
|
||||||
|
}
|
||||||
Binary file not shown.
Binary file not shown.
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgboost.R
|
||||||
\docType{data}
|
\docType{data}
|
||||||
\name{agaricus.test}
|
\name{agaricus.test}
|
||||||
\alias{agaricus.test}
|
\alias{agaricus.test}
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgboost.R
|
||||||
\docType{data}
|
\docType{data}
|
||||||
\name{agaricus.train}
|
\name{agaricus.train}
|
||||||
\alias{agaricus.train}
|
\alias{agaricus.train}
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/getinfo.xgb.DMatrix.R
|
||||||
\docType{methods}
|
\docType{methods}
|
||||||
\name{getinfo}
|
\name{getinfo}
|
||||||
\alias{getinfo}
|
\alias{getinfo}
|
||||||
@ -12,9 +13,9 @@ getinfo(object, ...)
|
|||||||
\arguments{
|
\arguments{
|
||||||
\item{object}{Object of class "xgb.DMatrix"}
|
\item{object}{Object of class "xgb.DMatrix"}
|
||||||
|
|
||||||
\item{name}{the name of the field to get}
|
|
||||||
|
|
||||||
\item{...}{other parameters}
|
\item{...}{other parameters}
|
||||||
|
|
||||||
|
\item{name}{the name of the field to get}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Get information of an xgb.DMatrix object
|
Get information of an xgb.DMatrix object
|
||||||
|
|||||||
@ -1,11 +1,12 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/predict.xgb.Booster.R
|
||||||
\docType{methods}
|
\docType{methods}
|
||||||
\name{predict,xgb.Booster-method}
|
\name{predict,xgb.Booster-method}
|
||||||
\alias{predict,xgb.Booster-method}
|
\alias{predict,xgb.Booster-method}
|
||||||
\title{Predict method for eXtreme Gradient Boosting model}
|
\title{Predict method for eXtreme Gradient Boosting model}
|
||||||
\usage{
|
\usage{
|
||||||
\S4method{predict}{xgb.Booster}(object, newdata, outputmargin = FALSE,
|
\S4method{predict}{xgb.Booster}(object, newdata, missing = NULL,
|
||||||
ntreelimit = NULL)
|
outputmargin = FALSE, ntreelimit = NULL)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{object}{Object of class "xgb.Boost"}
|
\item{object}{Object of class "xgb.Boost"}
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/setinfo.xgb.DMatrix.R
|
||||||
\docType{methods}
|
\docType{methods}
|
||||||
\name{setinfo}
|
\name{setinfo}
|
||||||
\alias{setinfo}
|
\alias{setinfo}
|
||||||
@ -12,11 +13,11 @@ setinfo(object, ...)
|
|||||||
\arguments{
|
\arguments{
|
||||||
\item{object}{Object of class "xgb.DMatrix"}
|
\item{object}{Object of class "xgb.DMatrix"}
|
||||||
|
|
||||||
|
\item{...}{other parameters}
|
||||||
|
|
||||||
\item{name}{the name of the field to get}
|
\item{name}{the name of the field to get}
|
||||||
|
|
||||||
\item{info}{the specific field of information to set}
|
\item{info}{the specific field of information to set}
|
||||||
|
|
||||||
\item{...}{other parameters}
|
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Set information of an xgb.DMatrix object
|
Set information of an xgb.DMatrix object
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/slice.xgb.DMatrix.R
|
||||||
\docType{methods}
|
\docType{methods}
|
||||||
\name{slice}
|
\name{slice}
|
||||||
\alias{slice}
|
\alias{slice}
|
||||||
@ -13,9 +14,9 @@ slice(object, ...)
|
|||||||
\arguments{
|
\arguments{
|
||||||
\item{object}{Object of class "xgb.DMatrix"}
|
\item{object}{Object of class "xgb.DMatrix"}
|
||||||
|
|
||||||
\item{idxset}{a integer vector of indices of rows needed}
|
|
||||||
|
|
||||||
\item{...}{other parameters}
|
\item{...}{other parameters}
|
||||||
|
|
||||||
|
\item{idxset}{a integer vector of indices of rows needed}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Get a new DMatrix containing the specified rows of
|
Get a new DMatrix containing the specified rows of
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.DMatrix.R
|
||||||
\name{xgb.DMatrix}
|
\name{xgb.DMatrix}
|
||||||
\alias{xgb.DMatrix}
|
\alias{xgb.DMatrix}
|
||||||
\title{Contruct xgb.DMatrix object}
|
\title{Contruct xgb.DMatrix object}
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.DMatrix.save.R
|
||||||
\name{xgb.DMatrix.save}
|
\name{xgb.DMatrix.save}
|
||||||
\alias{xgb.DMatrix.save}
|
\alias{xgb.DMatrix.save}
|
||||||
\title{Save xgb.DMatrix object to binary file}
|
\title{Save xgb.DMatrix object to binary file}
|
||||||
|
|||||||
@ -1,10 +1,12 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.cv.R
|
||||||
\name{xgb.cv}
|
\name{xgb.cv}
|
||||||
\alias{xgb.cv}
|
\alias{xgb.cv}
|
||||||
\title{Cross Validation}
|
\title{Cross Validation}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.cv(params = list(), data, nrounds, nfold, label = NULL, showsd = TRUE,
|
xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
|
||||||
metrics = list(), obj = NULL, feval = NULL, ...)
|
missing = NULL, showsd = TRUE, metrics = list(), obj = NULL,
|
||||||
|
feval = NULL, ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{params}{the list of parameters. Commonly used ones are:
|
\item{params}{the list of parameters. Commonly used ones are:
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.dump.R
|
||||||
\name{xgb.dump}
|
\name{xgb.dump}
|
||||||
\alias{xgb.dump}
|
\alias{xgb.dump}
|
||||||
\title{Save xgboost model to text file}
|
\title{Save xgboost model to text file}
|
||||||
|
|||||||
33
R-package/man/xgb.importance.Rd
Normal file
33
R-package/man/xgb.importance.Rd
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.importance.R
|
||||||
|
\name{xgb.importance}
|
||||||
|
\alias{xgb.importance}
|
||||||
|
\title{Show importance of features in a model}
|
||||||
|
\usage{
|
||||||
|
xgb.importance(feature_names, filename_dump)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{feature_names}{names of each feature as a character vector. Can be extracted from a sparse matrix.}
|
||||||
|
|
||||||
|
\item{filename_dump}{the name of the text file.}
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Read a xgboost model in text file format. Return a data.table of the features with their weight.
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
data(agaricus.train, package='xgboost')
|
||||||
|
data(agaricus.test, package='xgboost')
|
||||||
|
|
||||||
|
#Both dataset are list with two items, a sparse matrix and labels (outcome column which will be learned).
|
||||||
|
#Each column of the sparse Matrix is a feature in one hot encoding format.
|
||||||
|
train <- agaricus.train
|
||||||
|
test <- agaricus.test
|
||||||
|
|
||||||
|
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||||
|
eta = 1, nround = 2,objective = "binary:logistic")
|
||||||
|
xgb.dump(bst, 'xgb.model.dump')
|
||||||
|
|
||||||
|
#agaricus.test$data@Dimnames[[2]] represents the column name of the sparse matrix.
|
||||||
|
xgb.importance(agaricus.test$data@Dimnames[[2]], 'xgb.model.dump')
|
||||||
|
}
|
||||||
|
|
||||||
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.load.R
|
||||||
\name{xgb.load}
|
\name{xgb.load}
|
||||||
\alias{xgb.load}
|
\alias{xgb.load}
|
||||||
\title{Load xgboost model from binary file}
|
\title{Load xgboost model from binary file}
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.save.R
|
||||||
\name{xgb.save}
|
\name{xgb.save}
|
||||||
\alias{xgb.save}
|
\alias{xgb.save}
|
||||||
\title{Save xgboost model to binary file}
|
\title{Save xgboost model to binary file}
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.train.R
|
||||||
\name{xgb.train}
|
\name{xgb.train}
|
||||||
\alias{xgb.train}
|
\alias{xgb.train}
|
||||||
\title{eXtreme Gradient Boosting Training}
|
\title{eXtreme Gradient Boosting Training}
|
||||||
|
|||||||
@ -1,10 +1,11 @@
|
|||||||
% Generated by roxygen2 (4.0.1): do not edit by hand
|
% Generated by roxygen2 (4.1.0): do not edit by hand
|
||||||
|
% Please edit documentation in R/xgboost.R
|
||||||
\name{xgboost}
|
\name{xgboost}
|
||||||
\alias{xgboost}
|
\alias{xgboost}
|
||||||
\title{eXtreme Gradient Boosting (Tree) library}
|
\title{eXtreme Gradient Boosting (Tree) library}
|
||||||
\usage{
|
\usage{
|
||||||
xgboost(data = NULL, label = NULL, params = list(), nrounds,
|
xgboost(data = NULL, label = NULL, missing = NULL, params = list(),
|
||||||
verbose = 1, ...)
|
nrounds, verbose = 1, ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
|
\item{data}{takes \code{matrix}, \code{dgCMatrix}, local data file or
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user