Frequence to Frequency
This commit is contained in:
parent
77fab79d83
commit
51ee382517
@ -119,7 +119,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
|
|||||||
treeDump <- function(feature_names, text, keepDetail){
|
treeDump <- function(feature_names, text, keepDetail){
|
||||||
if(keepDetail) groupBy <- c("Feature", "Split", "MissingNo") else groupBy <- "Feature"
|
if(keepDetail) groupBy <- c("Feature", "Split", "MissingNo") else groupBy <- "Feature"
|
||||||
|
|
||||||
result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequence = Frequence / sum(Frequence))][order(Gain, decreasing = T)]
|
result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequency = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequency = Frequency / sum(Frequency))][order(Gain, decreasing = T)]
|
||||||
|
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|||||||
@ -166,4 +166,4 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
|
|||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
# The reason is that these variables are never declared
|
# The reason is that these variables are never declared
|
||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence"))
|
globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequency"))
|
||||||
@ -25,7 +25,7 @@ test_that("xgb.importance works", {
|
|||||||
expect_true(xgb.dump(bst, 'xgb.model.dump', with.stats = T))
|
expect_true(xgb.dump(bst, 'xgb.model.dump', with.stats = T))
|
||||||
importance <- xgb.importance(sparse_matrix@Dimnames[[2]], 'xgb.model.dump')
|
importance <- xgb.importance(sparse_matrix@Dimnames[[2]], 'xgb.model.dump')
|
||||||
expect_equal(dim(importance), c(7, 4))
|
expect_equal(dim(importance), c(7, 4))
|
||||||
expect_equal(colnames(importance), c("Feature", "Gain", "Cover", "Frequence"))
|
expect_equal(colnames(importance), c("Feature", "Gain", "Cover", "Frequency"))
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.plot.tree works", {
|
test_that("xgb.plot.tree works", {
|
||||||
|
|||||||
@ -202,7 +202,7 @@ head(importance)
|
|||||||
|
|
||||||
`Cover` measures the relative quantity of observations concerned by a feature.
|
`Cover` measures the relative quantity of observations concerned by a feature.
|
||||||
|
|
||||||
`Frequence` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it).
|
`Frequency` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it).
|
||||||
|
|
||||||
### Improvement in the interpretability of feature importance data.table
|
### Improvement in the interpretability of feature importance data.table
|
||||||
|
|
||||||
@ -216,7 +216,7 @@ For that purpose we will execute the same function as above but using two more p
|
|||||||
importanceRaw <- xgb.importance(sparse_matrix@Dimnames[[2]], model = bst, data = sparse_matrix, label = output_vector)
|
importanceRaw <- xgb.importance(sparse_matrix@Dimnames[[2]], model = bst, data = sparse_matrix, label = output_vector)
|
||||||
|
|
||||||
# Cleaning for better display
|
# Cleaning for better display
|
||||||
importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequence=NULL)]
|
importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequency=NULL)]
|
||||||
|
|
||||||
head(importanceClean)
|
head(importanceClean)
|
||||||
```
|
```
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user