[R] rename Quality -> Gain (#9938)
This commit is contained in:
parent
8b9c98b65b
commit
73713de601
@ -28,7 +28,7 @@
|
|||||||
#' - `Yes`: ID of the next node when the split condition is met.
|
#' - `Yes`: ID of the next node when the split condition is met.
|
||||||
#' - `No`: ID of the next node when the split condition is not met.
|
#' - `No`: ID of the next node when the split condition is not met.
|
||||||
#' - `Missing`: ID of the next node when the branch value is missing.
|
#' - `Missing`: ID of the next node when the branch value is missing.
|
||||||
#' - `Quality`: either the split gain (change in loss) or the leaf value.
|
#' - `Gain`: either the split gain (change in loss) or the leaf value.
|
||||||
#' - `Cover`: metric related to the number of observations either seen by a split
|
#' - `Cover`: metric related to the number of observations either seen by a split
|
||||||
#' or collected by a leaf during training.
|
#' or collected by a leaf during training.
|
||||||
#'
|
#'
|
||||||
@ -122,7 +122,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
|||||||
# parse branch lines
|
# parse branch lines
|
||||||
branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
|
branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
|
||||||
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
||||||
branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
|
branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover")
|
||||||
td[
|
td[
|
||||||
isLeaf == FALSE,
|
isLeaf == FALSE,
|
||||||
(branch_cols) := {
|
(branch_cols) := {
|
||||||
@ -132,7 +132,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
|||||||
xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
|
xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
|
||||||
if (length(xtr) == 0) {
|
if (length(xtr) == 0) {
|
||||||
as.data.table(
|
as.data.table(
|
||||||
list(Feature = "NA", Split = "NA", Yes = "NA", No = "NA", Missing = "NA", Quality = "NA", Cover = "NA")
|
list(Feature = "NA", Split = "NA", Yes = "NA", No = "NA", Missing = "NA", Gain = "NA", Cover = "NA")
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
as.data.table(xtr)
|
as.data.table(xtr)
|
||||||
@ -152,7 +152,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
|||||||
|
|
||||||
# parse leaf lines
|
# parse leaf lines
|
||||||
leaf_rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
leaf_rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
||||||
leaf_cols <- c("Feature", "Quality", "Cover")
|
leaf_cols <- c("Feature", "Gain", "Cover")
|
||||||
td[
|
td[
|
||||||
isLeaf == TRUE,
|
isLeaf == TRUE,
|
||||||
(leaf_cols) := {
|
(leaf_cols) := {
|
||||||
@ -167,7 +167,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
|||||||
]
|
]
|
||||||
|
|
||||||
# convert some columns to numeric
|
# convert some columns to numeric
|
||||||
numeric_cols <- c("Split", "Quality", "Cover")
|
numeric_cols <- c("Split", "Gain", "Cover")
|
||||||
td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols]
|
td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols]
|
||||||
if (use_int_id) {
|
if (use_int_id) {
|
||||||
int_cols <- c("Yes", "No", "Missing")
|
int_cols <- c("Yes", "No", "Missing")
|
||||||
|
|||||||
@ -92,7 +92,7 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d
|
|||||||
stop("Model tree columns are not as expected!\n",
|
stop("Model tree columns are not as expected!\n",
|
||||||
" Note that this function works only for tree models.")
|
" Note that this function works only for tree models.")
|
||||||
|
|
||||||
dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Quality)], by = "ID")
|
dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Gain)], by = "ID")
|
||||||
setkeyv(dt_depths, c("Tree", "ID"))
|
setkeyv(dt_depths, c("Tree", "ID"))
|
||||||
# count by depth levels, and also calculate average cover at a depth
|
# count by depth levels, and also calculate average cover at a depth
|
||||||
dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth]
|
dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth]
|
||||||
@ -157,6 +157,6 @@ get.leaf.depth <- function(dt_tree) {
|
|||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(
|
globalVariables(
|
||||||
c(
|
c(
|
||||||
".N", "N", "Depth", "Quality", "Cover", "Tree", "ID", "Yes", "No", "Feature", "Leaf", "Weight"
|
".N", "N", "Depth", "Gain", "Cover", "Tree", "ID", "Yes", "No", "Feature", "Leaf", "Weight"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@ -95,13 +95,13 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
|
|||||||
data.table::set(tree.matrix, j = nm, value = sub("^\\d+-", "", tree.matrix[[nm]]))
|
data.table::set(tree.matrix, j = nm, value = sub("^\\d+-", "", tree.matrix[[nm]]))
|
||||||
|
|
||||||
nodes.dt <- tree.matrix[
|
nodes.dt <- tree.matrix[
|
||||||
, .(Quality = sum(Quality))
|
, .(Gain = sum(Gain))
|
||||||
, by = .(abs.node.position, Feature)
|
, by = .(abs.node.position, Feature)
|
||||||
][, .(Text = paste0(
|
][, .(Text = paste0(
|
||||||
paste0(
|
paste0(
|
||||||
Feature[seq_len(min(length(Feature), features_keep))],
|
Feature[seq_len(min(length(Feature), features_keep))],
|
||||||
" (",
|
" (",
|
||||||
format(Quality[seq_len(min(length(Quality), features_keep))], digits = 5),
|
format(Gain[seq_len(min(length(Gain), features_keep))], digits = 5),
|
||||||
")"
|
")"
|
||||||
),
|
),
|
||||||
collapse = "\n"
|
collapse = "\n"
|
||||||
|
|||||||
@ -132,7 +132,7 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
|
|||||||
|
|
||||||
dt <- xgb.model.dt.tree(feature_names = feature_names, model = model, trees = trees)
|
dt <- xgb.model.dt.tree(feature_names = feature_names, model = model, trees = trees)
|
||||||
|
|
||||||
dt[, label := paste0(Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "\nValue: ", "\nGain: "), Quality)]
|
dt[, label := paste0(Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "\nValue: ", "\nGain: "), Gain)]
|
||||||
if (show_node_id)
|
if (show_node_id)
|
||||||
dt[, label := paste0(ID, ": ", label)]
|
dt[, label := paste0(ID, ": ", label)]
|
||||||
dt[Node == 0, label := paste0("Tree ", Tree, "\n", label)]
|
dt[Node == 0, label := paste0("Tree ", Tree, "\n", label)]
|
||||||
@ -199,4 +199,4 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
|
|||||||
# Avoid error messages during CRAN check.
|
# Avoid error messages during CRAN check.
|
||||||
# The reason is that these variables are never declared
|
# The reason is that these variables are never declared
|
||||||
# They are mainly column names inferred by Data.table...
|
# They are mainly column names inferred by Data.table...
|
||||||
globalVariables(c("Feature", "ID", "Cover", "Quality", "Split", "Yes", "No", "Missing", ".", "shape", "filledcolor", "label"))
|
globalVariables(c("Feature", "ID", "Cover", "Gain", "Split", "Yes", "No", "Missing", ".", "shape", "filledcolor", "label"))
|
||||||
|
|||||||
@ -46,7 +46,7 @@ for a leaf node, it simply labels it as \code{"Leaf"}.
|
|||||||
\item \code{Yes}: ID of the next node when the split condition is met.
|
\item \code{Yes}: ID of the next node when the split condition is met.
|
||||||
\item \code{No}: ID of the next node when the split condition is not met.
|
\item \code{No}: ID of the next node when the split condition is not met.
|
||||||
\item \code{Missing}: ID of the next node when the branch value is missing.
|
\item \code{Missing}: ID of the next node when the branch value is missing.
|
||||||
\item \code{Quality}: either the split gain (change in loss) or the leaf value.
|
\item \code{Gain}: either the split gain (change in loss) or the leaf value.
|
||||||
\item \code{Cover}: metric related to the number of observations either seen by a split
|
\item \code{Cover}: metric related to the number of observations either seen by a split
|
||||||
or collected by a leaf during training.
|
or collected by a leaf during training.
|
||||||
}
|
}
|
||||||
|
|||||||
@ -275,7 +275,7 @@ test_that("xgb.Booster serializing as R object works", {
|
|||||||
|
|
||||||
test_that("xgb.model.dt.tree works with and without feature names", {
|
test_that("xgb.model.dt.tree works with and without feature names", {
|
||||||
.skip_if_vcd_not_available()
|
.skip_if_vcd_not_available()
|
||||||
names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
|
names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover")
|
||||||
dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree)
|
dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree)
|
||||||
expect_equal(names.dt.trees, names(dt.tree))
|
expect_equal(names.dt.trees, names(dt.tree))
|
||||||
if (!flag_32bit)
|
if (!flag_32bit)
|
||||||
@ -341,7 +341,7 @@ test_that("xgb.importance works with and without feature names", {
|
|||||||
trees = trees
|
trees = trees
|
||||||
)[
|
)[
|
||||||
Feature != "Leaf", .(
|
Feature != "Leaf", .(
|
||||||
Gain = sum(Quality),
|
Gain = sum(Gain),
|
||||||
Cover = sum(Cover),
|
Cover = sum(Cover),
|
||||||
Frequency = .N
|
Frequency = .N
|
||||||
),
|
),
|
||||||
|
|||||||
@ -53,9 +53,9 @@ test_that("updating the model works", {
|
|||||||
# should be the same evaluation but different gains and larger cover
|
# should be the same evaluation but different gains and larger cover
|
||||||
expect_equal(bst2$evaluation_log, bst2r$evaluation_log)
|
expect_equal(bst2$evaluation_log, bst2r$evaluation_log)
|
||||||
if (!win32_flag) {
|
if (!win32_flag) {
|
||||||
expect_equal(tr2[Feature == 'Leaf']$Quality, tr2r[Feature == 'Leaf']$Quality)
|
expect_equal(tr2[Feature == 'Leaf']$Gain, tr2r[Feature == 'Leaf']$Gain)
|
||||||
}
|
}
|
||||||
expect_gt(sum(abs(tr2[Feature != 'Leaf']$Quality - tr2r[Feature != 'Leaf']$Quality)), 100)
|
expect_gt(sum(abs(tr2[Feature != 'Leaf']$Gain - tr2r[Feature != 'Leaf']$Gain)), 100)
|
||||||
expect_gt(sum(tr2r$Cover) / sum(tr2$Cover), 1.5)
|
expect_gt(sum(tr2r$Cover) / sum(tr2$Cover), 1.5)
|
||||||
|
|
||||||
# process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
|
# process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
|
||||||
@ -72,8 +72,8 @@ test_that("updating the model works", {
|
|||||||
tr2u <- xgb.model.dt.tree(model = bst2u)
|
tr2u <- xgb.model.dt.tree(model = bst2u)
|
||||||
# should be the same evaluation but different gains and larger cover
|
# should be the same evaluation but different gains and larger cover
|
||||||
expect_equal(bst2$evaluation_log, bst2u$evaluation_log)
|
expect_equal(bst2$evaluation_log, bst2u$evaluation_log)
|
||||||
expect_equal(tr2[Feature == 'Leaf']$Quality, tr2u[Feature == 'Leaf']$Quality)
|
expect_equal(tr2[Feature == 'Leaf']$Gain, tr2u[Feature == 'Leaf']$Gain)
|
||||||
expect_gt(sum(abs(tr2[Feature != 'Leaf']$Quality - tr2u[Feature != 'Leaf']$Quality)), 100)
|
expect_gt(sum(abs(tr2[Feature != 'Leaf']$Gain - tr2u[Feature != 'Leaf']$Gain)), 100)
|
||||||
expect_gt(sum(tr2u$Cover) / sum(tr2$Cover), 1.5)
|
expect_gt(sum(tr2u$Cover) / sum(tr2$Cover), 1.5)
|
||||||
# the results should be the same as for the model with an extra 'refresh' updater
|
# the results should be the same as for the model with an extra 'refresh' updater
|
||||||
expect_equal(bst2r$evaluation_log, bst2u$evaluation_log)
|
expect_equal(bst2r$evaluation_log, bst2u$evaluation_log)
|
||||||
@ -87,8 +87,8 @@ test_that("updating the model works", {
|
|||||||
tr1ut <- xgb.model.dt.tree(model = bst1ut)
|
tr1ut <- xgb.model.dt.tree(model = bst1ut)
|
||||||
# should be the same evaluations but different gains and smaller cover (test data is smaller)
|
# should be the same evaluations but different gains and smaller cover (test data is smaller)
|
||||||
expect_equal(bst1$evaluation_log, bst1ut$evaluation_log)
|
expect_equal(bst1$evaluation_log, bst1ut$evaluation_log)
|
||||||
expect_equal(tr1[Feature == 'Leaf']$Quality, tr1ut[Feature == 'Leaf']$Quality)
|
expect_equal(tr1[Feature == 'Leaf']$Gain, tr1ut[Feature == 'Leaf']$Gain)
|
||||||
expect_gt(sum(abs(tr1[Feature != 'Leaf']$Quality - tr1ut[Feature != 'Leaf']$Quality)), 100)
|
expect_gt(sum(abs(tr1[Feature != 'Leaf']$Gain - tr1ut[Feature != 'Leaf']$Gain)), 100)
|
||||||
expect_lt(sum(tr1ut$Cover) / sum(tr1$Cover), 0.5)
|
expect_lt(sum(tr1ut$Cover) / sum(tr1$Cover), 0.5)
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -111,7 +111,7 @@ test_that("updating works for multiclass & multitree", {
|
|||||||
|
|
||||||
# should be the same evaluation but different gains and larger cover
|
# should be the same evaluation but different gains and larger cover
|
||||||
expect_equal(bst0$evaluation_log, bst0u$evaluation_log)
|
expect_equal(bst0$evaluation_log, bst0u$evaluation_log)
|
||||||
expect_equal(tr0[Feature == 'Leaf']$Quality, tr0u[Feature == 'Leaf']$Quality)
|
expect_equal(tr0[Feature == 'Leaf']$Gain, tr0u[Feature == 'Leaf']$Gain)
|
||||||
expect_gt(sum(abs(tr0[Feature != 'Leaf']$Quality - tr0u[Feature != 'Leaf']$Quality)), 100)
|
expect_gt(sum(abs(tr0[Feature != 'Leaf']$Gain - tr0u[Feature != 'Leaf']$Gain)), 100)
|
||||||
expect_gt(sum(tr0u$Cover) / sum(tr0$Cover), 1.5)
|
expect_gt(sum(tr0u$Cover) / sum(tr0$Cover), 1.5)
|
||||||
})
|
})
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user