From 73713de6016163252958463147c9c6cd509e79b1 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Sun, 31 Dec 2023 06:01:00 +0100 Subject: [PATCH] [R] rename Quality -> Gain (#9938) --- R-package/R/xgb.model.dt.tree.R | 10 +++++----- R-package/R/xgb.plot.deepness.R | 4 ++-- R-package/R/xgb.plot.multi.trees.R | 4 ++-- R-package/R/xgb.plot.tree.R | 4 ++-- R-package/man/xgb.model.dt.tree.Rd | 2 +- R-package/tests/testthat/test_helpers.R | 4 ++-- R-package/tests/testthat/test_update.R | 16 ++++++++-------- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R index 8e74ea4b4..9a32d82a0 100644 --- a/R-package/R/xgb.model.dt.tree.R +++ b/R-package/R/xgb.model.dt.tree.R @@ -28,7 +28,7 @@ #' - `Yes`: ID of the next node when the split condition is met. #' - `No`: ID of the next node when the split condition is not met. #' - `Missing`: ID of the next node when the branch value is missing. -#' - `Quality`: either the split gain (change in loss) or the leaf value. +#' - `Gain`: either the split gain (change in loss) or the leaf value. #' - `Cover`: metric related to the number of observations either seen by a split #' or collected by a leaf during training. #' @@ -122,7 +122,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, # parse branch lines branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),", "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")") - branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover") + branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover") td[ isLeaf == FALSE, (branch_cols) := { @@ -132,7 +132,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree) if (length(xtr) == 0) { as.data.table( - list(Feature = "NA", Split = "NA", Yes = "NA", No = "NA", Missing = "NA", Quality = "NA", Cover = "NA") + list(Feature = "NA", Split = "NA", Yes = "NA", No = "NA", Missing = "NA", Gain = "NA", Cover = "NA") ) } else { as.data.table(xtr) @@ -152,7 +152,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, # parse leaf lines leaf_rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")") - leaf_cols <- c("Feature", "Quality", "Cover") + leaf_cols <- c("Feature", "Gain", "Cover") td[ isLeaf == TRUE, (leaf_cols) := { @@ -167,7 +167,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, ] # convert some columns to numeric - numeric_cols <- c("Split", "Quality", "Cover") + numeric_cols <- c("Split", "Gain", "Cover") td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols] if (use_int_id) { int_cols <- c("Yes", "No", "Missing") diff --git a/R-package/R/xgb.plot.deepness.R b/R-package/R/xgb.plot.deepness.R index 092b07d38..8e1972374 100644 --- a/R-package/R/xgb.plot.deepness.R +++ b/R-package/R/xgb.plot.deepness.R @@ -92,7 +92,7 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d stop("Model tree columns are not as expected!\n", " Note that this function works only for tree models.") - dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Quality)], by = "ID") + dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Gain)], by = "ID") setkeyv(dt_depths, c("Tree", "ID")) # count by depth levels, and also calculate average cover at a depth dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth] @@ -157,6 +157,6 @@ get.leaf.depth <- function(dt_tree) { # They are mainly column names inferred by Data.table... globalVariables( c( - ".N", "N", "Depth", "Quality", "Cover", "Tree", "ID", "Yes", "No", "Feature", "Leaf", "Weight" + ".N", "N", "Depth", "Gain", "Cover", "Tree", "ID", "Yes", "No", "Feature", "Leaf", "Weight" ) ) diff --git a/R-package/R/xgb.plot.multi.trees.R b/R-package/R/xgb.plot.multi.trees.R index 6402cb767..88616cfb7 100644 --- a/R-package/R/xgb.plot.multi.trees.R +++ b/R-package/R/xgb.plot.multi.trees.R @@ -95,13 +95,13 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, data.table::set(tree.matrix, j = nm, value = sub("^\\d+-", "", tree.matrix[[nm]])) nodes.dt <- tree.matrix[ - , .(Quality = sum(Quality)) + , .(Gain = sum(Gain)) , by = .(abs.node.position, Feature) ][, .(Text = paste0( paste0( Feature[seq_len(min(length(Feature), features_keep))], " (", - format(Quality[seq_len(min(length(Quality), features_keep))], digits = 5), + format(Gain[seq_len(min(length(Gain), features_keep))], digits = 5), ")" ), collapse = "\n" diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R index 8b12d8a68..c75a42e84 100644 --- a/R-package/R/xgb.plot.tree.R +++ b/R-package/R/xgb.plot.tree.R @@ -132,7 +132,7 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot dt <- xgb.model.dt.tree(feature_names = feature_names, model = model, trees = trees) - dt[, label := paste0(Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "\nValue: ", "\nGain: "), Quality)] + dt[, label := paste0(Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "\nValue: ", "\nGain: "), Gain)] if (show_node_id) dt[, label := paste0(ID, ": ", label)] dt[Node == 0, label := paste0("Tree ", Tree, "\n", label)] @@ -199,4 +199,4 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot # Avoid error messages during CRAN check. # The reason is that these variables are never declared # They are mainly column names inferred by Data.table... -globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", "Missing", ".", "shape", "filledcolor", "label")) +globalVariables(c("Feature", "ID", "Cover", "Gain", "Split", "Yes", "No", "Missing", ".", "shape", "filledcolor", "label")) diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd index 477c40775..330998ab8 100644 --- a/R-package/man/xgb.model.dt.tree.Rd +++ b/R-package/man/xgb.model.dt.tree.Rd @@ -46,7 +46,7 @@ for a leaf node, it simply labels it as \code{"Leaf"}. \item \code{Yes}: ID of the next node when the split condition is met. \item \code{No}: ID of the next node when the split condition is not met. \item \code{Missing}: ID of the next node when the branch value is missing. -\item \code{Quality}: either the split gain (change in loss) or the leaf value. +\item \code{Gain}: either the split gain (change in loss) or the leaf value. \item \code{Cover}: metric related to the number of observations either seen by a split or collected by a leaf during training. } diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index de6a099fc..7fae052b4 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -275,7 +275,7 @@ test_that("xgb.Booster serializing as R object works", { test_that("xgb.model.dt.tree works with and without feature names", { .skip_if_vcd_not_available() - names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover") + names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover") dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree) expect_equal(names.dt.trees, names(dt.tree)) if (!flag_32bit) @@ -341,7 +341,7 @@ test_that("xgb.importance works with and without feature names", { trees = trees )[ Feature != "Leaf", .( - Gain = sum(Quality), + Gain = sum(Gain), Cover = sum(Cover), Frequency = .N ), diff --git a/R-package/tests/testthat/test_update.R b/R-package/tests/testthat/test_update.R index cf8b6f007..f37bb0d21 100644 --- a/R-package/tests/testthat/test_update.R +++ b/R-package/tests/testthat/test_update.R @@ -53,9 +53,9 @@ test_that("updating the model works", { # should be the same evaluation but different gains and larger cover expect_equal(bst2$evaluation_log, bst2r$evaluation_log) if (!win32_flag) { - expect_equal(tr2[Feature == 'Leaf']$Quality, tr2r[Feature == 'Leaf']$Quality) + expect_equal(tr2[Feature == 'Leaf']$Gain, tr2r[Feature == 'Leaf']$Gain) } - expect_gt(sum(abs(tr2[Feature != 'Leaf']$Quality - tr2r[Feature != 'Leaf']$Quality)), 100) + expect_gt(sum(abs(tr2[Feature != 'Leaf']$Gain - tr2r[Feature != 'Leaf']$Gain)), 100) expect_gt(sum(tr2r$Cover) / sum(tr2$Cover), 1.5) # process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data: @@ -72,8 +72,8 @@ test_that("updating the model works", { tr2u <- xgb.model.dt.tree(model = bst2u) # should be the same evaluation but different gains and larger cover expect_equal(bst2$evaluation_log, bst2u$evaluation_log) - expect_equal(tr2[Feature == 'Leaf']$Quality, tr2u[Feature == 'Leaf']$Quality) - expect_gt(sum(abs(tr2[Feature != 'Leaf']$Quality - tr2u[Feature != 'Leaf']$Quality)), 100) + expect_equal(tr2[Feature == 'Leaf']$Gain, tr2u[Feature == 'Leaf']$Gain) + expect_gt(sum(abs(tr2[Feature != 'Leaf']$Gain - tr2u[Feature != 'Leaf']$Gain)), 100) expect_gt(sum(tr2u$Cover) / sum(tr2$Cover), 1.5) # the results should be the same as for the model with an extra 'refresh' updater expect_equal(bst2r$evaluation_log, bst2u$evaluation_log) @@ -87,8 +87,8 @@ test_that("updating the model works", { tr1ut <- xgb.model.dt.tree(model = bst1ut) # should be the same evaluations but different gains and smaller cover (test data is smaller) expect_equal(bst1$evaluation_log, bst1ut$evaluation_log) - expect_equal(tr1[Feature == 'Leaf']$Quality, tr1ut[Feature == 'Leaf']$Quality) - expect_gt(sum(abs(tr1[Feature != 'Leaf']$Quality - tr1ut[Feature != 'Leaf']$Quality)), 100) + expect_equal(tr1[Feature == 'Leaf']$Gain, tr1ut[Feature == 'Leaf']$Gain) + expect_gt(sum(abs(tr1[Feature != 'Leaf']$Gain - tr1ut[Feature != 'Leaf']$Gain)), 100) expect_lt(sum(tr1ut$Cover) / sum(tr1$Cover), 0.5) }) @@ -111,7 +111,7 @@ test_that("updating works for multiclass & multitree", { # should be the same evaluation but different gains and larger cover expect_equal(bst0$evaluation_log, bst0u$evaluation_log) - expect_equal(tr0[Feature == 'Leaf']$Quality, tr0u[Feature == 'Leaf']$Quality) - expect_gt(sum(abs(tr0[Feature != 'Leaf']$Quality - tr0u[Feature != 'Leaf']$Quality)), 100) + expect_equal(tr0[Feature == 'Leaf']$Gain, tr0u[Feature == 'Leaf']$Gain) + expect_gt(sum(abs(tr0[Feature != 'Leaf']$Gain - tr0u[Feature != 'Leaf']$Gain)), 100) expect_gt(sum(tr0u$Cover) / sum(tr0$Cover), 1.5) })