From 51ee38251729d0b19d17cdc4779d927b45003f9e Mon Sep 17 00:00:00 2001 From: terrytangyuan Date: Fri, 20 Nov 2015 20:25:29 -0600 Subject: [PATCH] Frequence to Frequency --- R-package/R/xgb.importance.R | 2 +- R-package/R/xgb.model.dt.tree.R | 2 +- R-package/tests/testthat/test_helpers.R | 2 +- R-package/vignettes/discoverYourData.Rmd | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index 478438a79..029c3725b 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -119,7 +119,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N treeDump <- function(feature_names, text, keepDetail){ if(keepDetail) groupBy <- c("Feature", "Split", "MissingNo") else groupBy <- "Feature" - result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequence = Frequence / sum(Frequence))][order(Gain, decreasing = T)] + result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequency = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequency = Frequency / sum(Frequency))][order(Gain, decreasing = T)] result } diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R index 0e42ebd75..5833389e2 100644 --- a/R-package/R/xgb.model.dt.tree.R +++ b/R-package/R/xgb.model.dt.tree.R @@ -166,4 +166,4 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model # Avoid error messages during CRAN check. # The reason is that these variables are never declared # They are mainly column names inferred by Data.table... -globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequence")) \ No newline at end of file +globalVariables(c("ID", "Tree", "Yes", ".", ".N", "Feature", "Cover", "Quality", "No", "Gain", "Frequency")) \ No newline at end of file diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index d8f69ae72..c51fef1bd 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -25,7 +25,7 @@ test_that("xgb.importance works", { expect_true(xgb.dump(bst, 'xgb.model.dump', with.stats = T)) importance <- xgb.importance(sparse_matrix@Dimnames[[2]], 'xgb.model.dump') expect_equal(dim(importance), c(7, 4)) - expect_equal(colnames(importance), c("Feature", "Gain", "Cover", "Frequence")) + expect_equal(colnames(importance), c("Feature", "Gain", "Cover", "Frequency")) }) test_that("xgb.plot.tree works", { diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd index fa780ee94..22d996b08 100644 --- a/R-package/vignettes/discoverYourData.Rmd +++ b/R-package/vignettes/discoverYourData.Rmd @@ -202,7 +202,7 @@ head(importance) `Cover` measures the relative quantity of observations concerned by a feature. -`Frequence` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it). +`Frequency` is a simpler way to measure the `Gain`. It just counts the number of times a feature is used in all generated trees. You should not use it (unless you know why you want to use it). ### Improvement in the interpretability of feature importance data.table @@ -216,7 +216,7 @@ For that purpose we will execute the same function as above but using two more p importanceRaw <- xgb.importance(sparse_matrix@Dimnames[[2]], model = bst, data = sparse_matrix, label = output_vector) # Cleaning for better display -importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequence=NULL)] +importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequency=NULL)] head(importanceClean) ```