Merge pull request #662 from pommedeterresautee/master

Improve feature importance on GLM model
This commit is contained in:
Michaël Benesty 2015-12-01 19:02:18 +01:00
commit c870ef49da
2 changed files with 13 additions and 2 deletions

View File

@ -82,7 +82,9 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe
} }
linearDump <- function(feature_names, text){ linearDump <- function(feature_names, text){
which(text == "weight:") %>% {a =. + 1; text[a:length(text)]} %>% as.numeric %>% data.table(Feature = feature_names, Weight = .) weights <- which(text == "weight:") %>% {a =. + 1; text[a:length(text)]} %>% as.numeric
if(is.null(feature_names)) feature_names <- seq(to = length(weights))
data.table(Feature = feature_names, Weight = weights)
} }
model.text.dump <- xgb.dump(model = model, with.stats = T) model.text.dump <- xgb.dump(model = model, with.stats = T)

View File

@ -15,7 +15,7 @@ df[,ID := NULL]
sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df) sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
output_vector <- df[,Y := 0][Improved == "Marked",Y := 1][,Y] output_vector <- df[,Y := 0][Improved == "Marked",Y := 1][,Y]
bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9, bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9,
eta = 1, nthread = 2, nround = 10,objective = "binary:logistic") eta = 1, nthread = 2, nround = 10, objective = "binary:logistic")
feature.names <- agaricus.train$data@Dimnames[[2]] feature.names <- agaricus.train$data@Dimnames[[2]]
@ -40,6 +40,15 @@ test_that("xgb.importance works with and without feature names", {
xgb.importance(model = bst) xgb.importance(model = bst)
}) })
test_that("xgb.importance works with GLM model", {
bst.GLM <- xgboost(data = sparse_matrix, label = output_vector,
eta = 1, nthread = 2, nround = 10, objective = "binary:logistic", booster = "gblinear")
importance.GLM <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst.GLM)
expect_equal(dim(importance.GLM), c(10, 2))
expect_equal(colnames(importance.GLM), c("Feature", "Weight"))
xgb.importance(model = bst.GLM)
})
test_that("xgb.plot.tree works with and without feature names", { test_that("xgb.plot.tree works with and without feature names", {
xgb.plot.tree(feature_names = feature.names, model = bst) xgb.plot.tree(feature_names = feature.names, model = bst)
xgb.plot.tree(model = bst) xgb.plot.tree(model = bst)