Frequence to Frequency
This commit is contained in:
parent
77fab79d83
commit
51ee382517
@ -119,7 +119,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
|
||||
treeDump <- function(feature_names, text, keepDetail){
|
||||
if(keepDetail) groupBy <- c("Feature", "Split", "MissingNo") else groupBy <- "Feature"
|
||||
|
||||
result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequence = Frequence / sum(Frequence))][order(Gain, decreasing = T)]
|
||||
result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequency = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequency = Frequency / sum(Frequency))][order(Gain, decreasing = T)]
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
@ -166,4 +166,4 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
||||
# Avoid error messages during CRAN check.
|
||||
# The reason is that these variables are never declared
|
||||
# They are mainly column names inferred by Data.table...
|
||||
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence"))
|
||||
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequency"))
|
||||
@ -25,7 +25,7 @@ test_that("xgb.importance works", {
|
||||
expect_true(xgb.dump(bst, 'xgb.model.dump', with.stats = T))
|
||||
importance <- xgb.importance(sparse_matrix@Dimnames[[2]], 'xgb.model.dump')
|
||||
expect_equal(dim(importance), c(7, 4))
|
||||
expect_equal(colnames(importance), c("Feature", "Gain", "Cover", "Frequence"))
|
||||
expect_equal(colnames(importance), c("Feature", "Gain", "Cover", "Frequency"))
|
||||
})
|
||||
|
||||
test_that("xgb.plot.tree works", {
|
||||
|
||||
@ -202,7 +202,7 @@ head(importance)
|
||||
|
||||
`Cover` measures the relative quantity of observations concerned by a feature.
|
||||
|
||||
`Frequence` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it).
|
||||
`Frequency` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it).
|
||||
|
||||
### Improvement in the interpretability of feature importance data.table
|
||||
|
||||
@ -216,7 +216,7 @@ For that purpose we will execute the same function as above but using two more p
|
||||
importanceRaw <- xgb.importance(sparse_matrix@Dimnames[[2]], model = bst, data = sparse_matrix, label = output_vector)
|
||||
|
||||
# Cleaning for better display
|
||||
importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequence=NULL)]
|
||||
importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequency=NULL)]
|
||||
|
||||
head(importanceClean)
|
||||
```
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user