diff --git a/R-package/R/xgb.plot.importance.R b/R-package/R/xgb.plot.importance.R index 2c02d5a42..07220375d 100644 --- a/R-package/R/xgb.plot.importance.R +++ b/R-package/R/xgb.plot.importance.R @@ -87,7 +87,13 @@ xgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure } # also aggregate, just in case when the values were not yet summed up by feature - importance_matrix <- importance_matrix[, Importance := sum(get(measure)), by = Feature] + importance_matrix <- importance_matrix[ + , lapply(.SD, sum) + , .SDcols = setdiff(names(importance_matrix), "Feature") + , by = Feature + ][ + , Importance := get(measure) + ] # make sure it's ordered importance_matrix <- importance_matrix[order(-abs(Importance))] diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 04e034ce1..de6a099fc 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -382,6 +382,9 @@ test_that("xgb.importance works with GLM model", { expect_equal(colnames(imp2plot), c("Feature", "Weight", "Importance")) xgb.ggplot.importance(importance.GLM) + # check that the input is not modified in-place + expect_false("Importance" %in% names(importance.GLM)) + # for multiclass imp.GLM <- xgb.importance(model = mbst.GLM) expect_equal(dim(imp.GLM), c(12, 3)) @@ -400,6 +403,16 @@ test_that("xgb.model.dt.tree and xgb.importance work with a single split model", expect_equal(imp$Gain, 1) }) +test_that("xgb.plot.importance de-duplicates features", { + importances <- data.table( + Feature = c("col1", "col2", "col2"), + Gain = c(0.4, 0.3, 0.3) + ) + imp2plot <- xgb.plot.importance(importances) + expect_equal(nrow(imp2plot), 2L) + expect_equal(imp2plot$Feature, c("col2", "col1")) +}) + test_that("xgb.plot.tree works with and without feature names", { .skip_if_vcd_not_available() expect_silent(xgb.plot.tree(feature_names = feature.names, model = bst.Tree))