Frequence to Frequency

This commit is contained in:
terrytangyuan 2015-11-20 20:25:29 -06:00
parent 77fab79d83
commit 51ee382517
4 changed files with 5 additions and 5 deletions

View File

@ -119,7 +119,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
treeDump <- function(feature_names, text, keepDetail){ treeDump <- function(feature_names, text, keepDetail){
if(keepDetail) groupBy <- c("Feature", "Split", "MissingNo") else groupBy <- "Feature" if(keepDetail) groupBy <- c("Feature", "Split", "MissingNo") else groupBy <- "Feature"
result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequence = Frequence / sum(Frequence))][order(Gain, decreasing = T)] result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequency = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequency = Frequency / sum(Frequency))][order(Gain, decreasing = T)]
result result
} }

View File

@ -166,4 +166,4 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
# Avoid error messages during CRAN check. # Avoid error messages during CRAN check.
# The reason is that these variables are never declared # The reason is that these variables are never declared
# They are mainly column names inferred by Data.table... # They are mainly column names inferred by Data.table...
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence")) globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequency"))

View File

@ -25,7 +25,7 @@ test_that("xgb.importance works", {
expect_true(xgb.dump(bst, 'xgb.model.dump', with.stats = T)) expect_true(xgb.dump(bst, 'xgb.model.dump', with.stats = T))
importance <- xgb.importance(sparse_matrix@Dimnames[[2]], 'xgb.model.dump') importance <- xgb.importance(sparse_matrix@Dimnames[[2]], 'xgb.model.dump')
expect_equal(dim(importance), c(7, 4)) expect_equal(dim(importance), c(7, 4))
expect_equal(colnames(importance), c("Feature", "Gain", "Cover", "Frequence")) expect_equal(colnames(importance), c("Feature", "Gain", "Cover", "Frequency"))
}) })
test_that("xgb.plot.tree works", { test_that("xgb.plot.tree works", {

View File

@ -202,7 +202,7 @@ head(importance)
`Cover` measures the relative quantity of observations concerned by a feature. `Cover` measures the relative quantity of observations concerned by a feature.
`Frequence` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it). `Frequency` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it).
### Improvement in the interpretability of feature importance data.table ### Improvement in the interpretability of feature importance data.table
@ -216,7 +216,7 @@ For that purpose we will execute the same function as above but using two more p
importanceRaw <- xgb.importance(sparse_matrix@Dimnames[[2]], model = bst, data = sparse_matrix, label = output_vector) importanceRaw <- xgb.importance(sparse_matrix@Dimnames[[2]], model = bst, data = sparse_matrix, label = output_vector)
# Cleaning for better display # Cleaning for better display
importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequence=NULL)] importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequency=NULL)]
head(importanceClean) head(importanceClean)
``` ```