[R-package] remove dependency on {magrittr} (#6928)
Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
parent
44cc9c04ea
commit
894e9bc5d4
2
.github/workflows/r_nold.yml
vendored
2
.github/workflows/r_nold.yml
vendored
@ -8,7 +8,7 @@ on:
|
||||
types: [created]
|
||||
|
||||
env:
|
||||
R_PACKAGES: c('XML', 'igraph', 'data.table', 'magrittr', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
||||
R_PACKAGES: c('XML', 'igraph', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
||||
|
||||
jobs:
|
||||
test-R-noLD:
|
||||
|
||||
2
.github/workflows/r_tests.yml
vendored
2
.github/workflows/r_tests.yml
vendored
@ -3,7 +3,7 @@ name: XGBoost-R-Tests
|
||||
on: [push, pull_request]
|
||||
|
||||
env:
|
||||
R_PACKAGES: c('XML', 'igraph', 'data.table', 'magrittr', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
||||
R_PACKAGES: c('XML', 'igraph', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
|
||||
|
||||
jobs:
|
||||
lintr:
|
||||
|
||||
@ -62,7 +62,6 @@ Imports:
|
||||
Matrix (>= 1.1-0),
|
||||
methods,
|
||||
data.table (>= 1.9.6),
|
||||
magrittr (>= 1.5),
|
||||
jsonlite (>= 1.0),
|
||||
RoxygenNote: 7.1.1
|
||||
SystemRequirements: GNU make, C++14
|
||||
|
||||
@ -82,7 +82,6 @@ importFrom(graphics,points)
|
||||
importFrom(graphics,title)
|
||||
importFrom(jsonlite,fromJSON)
|
||||
importFrom(jsonlite,toJSON)
|
||||
importFrom(magrittr,"%>%")
|
||||
importFrom(stats,median)
|
||||
importFrom(stats,predict)
|
||||
importFrom(utils,head)
|
||||
|
||||
@ -642,8 +642,13 @@ cb.gblinear.history <- function(sparse=FALSE) {
|
||||
coefs <<- list2mat(coefs)
|
||||
} else { # xgb.cv:
|
||||
# first lapply transposes the list
|
||||
coefs <<- lapply(seq_along(coefs[[1]]), function(i) lapply(coefs, "[[", i)) %>%
|
||||
lapply(function(x) list2mat(x))
|
||||
coefs <<- lapply(
|
||||
X = lapply(
|
||||
X = seq_along(coefs[[1]]),
|
||||
FUN = function(i) lapply(coefs, "[[", i)
|
||||
),
|
||||
FUN = function(x) list2mat(x)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -372,8 +372,14 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
} else if (n_group == 1) {
|
||||
matrix(ret, nrow = n_row, byrow = TRUE, dimnames = list(NULL, cnames))
|
||||
} else {
|
||||
arr <- array(ret, c(n_col1, n_group, n_row),
|
||||
dimnames = list(cnames, NULL, NULL)) %>% aperm(c(2, 3, 1)) # [group, row, col]
|
||||
arr <- aperm(
|
||||
a = array(
|
||||
data = ret,
|
||||
dim = c(n_col1, n_group, n_row),
|
||||
dimnames = list(cnames, NULL, NULL)
|
||||
),
|
||||
perm = c(2, 3, 1) # [group, row, col]
|
||||
)
|
||||
lapply(seq_len(n_group), function(g) arr[g, , ])
|
||||
}
|
||||
} else if (predinteraction) {
|
||||
@ -383,10 +389,23 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
ret <- if (n_ret == n_row) {
|
||||
matrix(ret, ncol = 1, dimnames = list(NULL, cnames))
|
||||
} else if (n_group == 1) {
|
||||
array(ret, c(n_col1, n_col1, n_row), dimnames = list(cnames, cnames, NULL)) %>% aperm(c(3, 1, 2))
|
||||
aperm(
|
||||
a = array(
|
||||
data = ret,
|
||||
dim = c(n_col1, n_col1, n_row),
|
||||
dimnames = list(cnames, cnames, NULL)
|
||||
),
|
||||
perm = c(3, 1, 2)
|
||||
)
|
||||
} else {
|
||||
arr <- array(ret, c(n_col1, n_col1, n_group, n_row),
|
||||
dimnames = list(cnames, cnames, NULL, NULL)) %>% aperm(c(3, 4, 1, 2)) # [group, row, col1, col2]
|
||||
arr <- aperm(
|
||||
a = array(
|
||||
data = ret,
|
||||
dim = c(n_col1, n_col1, n_group, n_row),
|
||||
dimnames = list(cnames, cnames, NULL, NULL)
|
||||
),
|
||||
perm = c(3, 4, 1, 2) # [group, row, col1, col2]
|
||||
)
|
||||
lapply(seq_len(n_group), function(g) arr[g, , , ])
|
||||
}
|
||||
} else if (reshape && npred_per_case > 1) {
|
||||
|
||||
@ -100,9 +100,10 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
|
||||
|
||||
# linear model
|
||||
if (model_text_dump[2] == "bias:"){
|
||||
weights <- which(model_text_dump == "weight:") %>%
|
||||
{model_text_dump[(. + 1):length(model_text_dump)]} %>%
|
||||
as.numeric
|
||||
weight_index <- which(model_text_dump == "weight:") + 1
|
||||
weights <- as.numeric(
|
||||
model_text_dump[weight_index:length(model_text_dump)]
|
||||
)
|
||||
|
||||
num_class <- NVL(model$params$num_class, 1)
|
||||
if (is.null(feature_names))
|
||||
|
||||
@ -75,8 +75,8 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
|
||||
while (tree.matrix[, sum(is.na(abs.node.position))] > 0) {
|
||||
yes.row.nodes <- tree.matrix[abs.node.position %in% precedent.nodes & !is.na(Yes)]
|
||||
no.row.nodes <- tree.matrix[abs.node.position %in% precedent.nodes & !is.na(No)]
|
||||
yes.nodes.abs.pos <- yes.row.nodes[, abs.node.position] %>% paste0("_0")
|
||||
no.nodes.abs.pos <- no.row.nodes[, abs.node.position] %>% paste0("_1")
|
||||
yes.nodes.abs.pos <- paste0(yes.row.nodes[, abs.node.position], "_0")
|
||||
no.nodes.abs.pos <- paste0(no.row.nodes[, abs.node.position], "_1")
|
||||
|
||||
tree.matrix[ID %in% yes.row.nodes[, Yes], abs.node.position := yes.nodes.abs.pos]
|
||||
tree.matrix[ID %in% no.row.nodes[, No], abs.node.position := no.nodes.abs.pos]
|
||||
@ -92,19 +92,28 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
|
||||
nodes.dt <- tree.matrix[
|
||||
, .(Quality = sum(Quality))
|
||||
, by = .(abs.node.position, Feature)
|
||||
][, .(Text = paste0(Feature[1:min(length(Feature), features_keep)],
|
||||
" (",
|
||||
format(Quality[1:min(length(Quality), features_keep)], digits = 5),
|
||||
")") %>%
|
||||
paste0(collapse = "\n"))
|
||||
, by = abs.node.position]
|
||||
][, .(Text = paste0(
|
||||
paste0(
|
||||
Feature[1:min(length(Feature), features_keep)],
|
||||
" (",
|
||||
format(Quality[1:min(length(Quality), features_keep)], digits = 5),
|
||||
")"
|
||||
),
|
||||
collapse = "\n"
|
||||
)
|
||||
)
|
||||
, by = abs.node.position
|
||||
]
|
||||
|
||||
edges.dt <- tree.matrix[Feature != "Leaf", .(abs.node.position, Yes)] %>%
|
||||
list(tree.matrix[Feature != "Leaf", .(abs.node.position, No)]) %>%
|
||||
rbindlist() %>%
|
||||
setnames(c("From", "To")) %>%
|
||||
.[, .N, .(From, To)] %>%
|
||||
.[, N := NULL]
|
||||
edges.dt <- data.table::rbindlist(
|
||||
l = list(
|
||||
tree.matrix[Feature != "Leaf", .(abs.node.position, Yes)],
|
||||
tree.matrix[Feature != "Leaf", .(abs.node.position, No)]
|
||||
)
|
||||
)
|
||||
data.table::setnames(edges.dt, c("From", "To"))
|
||||
edges.dt <- edges.dt[, .N, .(From, To)]
|
||||
edges.dt[, N := NULL]
|
||||
|
||||
nodes <- DiagrammeR::create_node_df(
|
||||
n = nrow(nodes.dt),
|
||||
@ -120,21 +129,25 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
|
||||
nodes_df = nodes,
|
||||
edges_df = edges,
|
||||
attr_theme = NULL
|
||||
) %>%
|
||||
DiagrammeR::add_global_graph_attrs(
|
||||
)
|
||||
graph <- DiagrammeR::add_global_graph_attrs(
|
||||
graph = graph,
|
||||
attr_type = "graph",
|
||||
attr = c("layout", "rankdir"),
|
||||
value = c("dot", "LR")
|
||||
) %>%
|
||||
DiagrammeR::add_global_graph_attrs(
|
||||
)
|
||||
graph <- DiagrammeR::add_global_graph_attrs(
|
||||
graph = graph,
|
||||
attr_type = "node",
|
||||
attr = c("color", "fillcolor", "style", "shape", "fontname"),
|
||||
value = c("DimGray", "beige", "filled", "rectangle", "Helvetica")
|
||||
) %>%
|
||||
DiagrammeR::add_global_graph_attrs(
|
||||
)
|
||||
graph <- DiagrammeR::add_global_graph_attrs(
|
||||
graph = graph,
|
||||
attr_type = "edge",
|
||||
attr = c("color", "arrowsize", "arrowhead", "fontname"),
|
||||
value = c("DimGray", "1.5", "vee", "Helvetica"))
|
||||
value = c("DimGray", "1.5", "vee", "Helvetica")
|
||||
)
|
||||
|
||||
if (!render) return(invisible(graph))
|
||||
|
||||
|
||||
@ -99,33 +99,41 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
|
||||
fontcolor = "black")
|
||||
|
||||
edges <- DiagrammeR::create_edge_df(
|
||||
from = match(dt[Feature != "Leaf", c(ID)] %>% rep(2), dt$ID),
|
||||
from = match(rep(dt[Feature != "Leaf", c(ID)], 2), dt$ID),
|
||||
to = match(dt[Feature != "Leaf", c(Yes, No)], dt$ID),
|
||||
label = dt[Feature != "Leaf", paste("<", Split)] %>%
|
||||
c(rep("", nrow(dt[Feature != "Leaf"]))),
|
||||
style = dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")] %>%
|
||||
c(dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]),
|
||||
label = c(
|
||||
dt[Feature != "Leaf", paste("<", Split)],
|
||||
rep("", nrow(dt[Feature != "Leaf"]))
|
||||
),
|
||||
style = c(
|
||||
dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")],
|
||||
dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]
|
||||
),
|
||||
rel = "leading_to")
|
||||
|
||||
graph <- DiagrammeR::create_graph(
|
||||
nodes_df = nodes,
|
||||
edges_df = edges,
|
||||
attr_theme = NULL
|
||||
) %>%
|
||||
DiagrammeR::add_global_graph_attrs(
|
||||
)
|
||||
graph <- DiagrammeR::add_global_graph_attrs(
|
||||
graph = graph,
|
||||
attr_type = "graph",
|
||||
attr = c("layout", "rankdir"),
|
||||
value = c("dot", "LR")
|
||||
) %>%
|
||||
DiagrammeR::add_global_graph_attrs(
|
||||
)
|
||||
graph <- DiagrammeR::add_global_graph_attrs(
|
||||
graph = graph,
|
||||
attr_type = "node",
|
||||
attr = c("color", "style", "fontname"),
|
||||
value = c("DimGray", "filled", "Helvetica")
|
||||
) %>%
|
||||
DiagrammeR::add_global_graph_attrs(
|
||||
)
|
||||
graph <- DiagrammeR::add_global_graph_attrs(
|
||||
graph = graph,
|
||||
attr_type = "edge",
|
||||
attr = c("color", "arrowsize", "arrowhead", "fontname"),
|
||||
value = c("DimGray", "1.5", "vee", "Helvetica"))
|
||||
value = c("DimGray", "1.5", "vee", "Helvetica")
|
||||
)
|
||||
|
||||
if (!render) return(invisible(graph))
|
||||
|
||||
|
||||
@ -90,7 +90,6 @@ NULL
|
||||
#' @importFrom data.table setkey
|
||||
#' @importFrom data.table setkeyv
|
||||
#' @importFrom data.table setnames
|
||||
#' @importFrom magrittr %>%
|
||||
#' @importFrom jsonlite fromJSON
|
||||
#' @importFrom jsonlite toJSON
|
||||
#' @importFrom utils object.size str tail
|
||||
|
||||
@ -110,7 +110,7 @@ test_that("predict feature contributions works", {
|
||||
pred <- predict(bst.GLM, sparse_matrix, outputmargin = TRUE)
|
||||
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
|
||||
# manual calculation of linear terms
|
||||
coefs <- xgb.dump(bst.GLM)[-c(1, 2, 4)] %>% as.numeric
|
||||
coefs <- as.numeric(xgb.dump(bst.GLM)[-c(1, 2, 4)])
|
||||
coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
|
||||
pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN = "*")
|
||||
expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual),
|
||||
@ -130,7 +130,11 @@ test_that("predict feature contributions works", {
|
||||
pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
|
||||
pred_contr <- predict(mbst.GLM, as.matrix(iris[, -5]), predcontrib = TRUE)
|
||||
expect_length(pred_contr, 3)
|
||||
coefs_all <- xgb.dump(mbst.GLM)[-c(1, 2, 6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE)
|
||||
coefs_all <- matrix(
|
||||
data = as.numeric(xgb.dump(mbst.GLM)[-c(1, 2, 6)]),
|
||||
ncol = 3,
|
||||
byrow = TRUE
|
||||
)
|
||||
for (g in seq_along(pred_contr)) {
|
||||
expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
|
||||
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
context('Test prediction of feature interactions')
|
||||
|
||||
require(xgboost)
|
||||
require(magrittr)
|
||||
|
||||
set.seed(123)
|
||||
|
||||
@ -32,7 +31,7 @@ test_that("predict feature interactions works", {
|
||||
cont <- predict(b, dm, predcontrib = TRUE)
|
||||
expect_equal(dim(cont), c(N, P + 1))
|
||||
# make sure for each row they add up to marginal predictions
|
||||
max(abs(rowSums(cont) - pred)) %>% expect_lt(0.001)
|
||||
expect_lt(max(abs(rowSums(cont) - pred)), 0.001)
|
||||
# Hand-construct the 'ground truth' feature contributions:
|
||||
gt_cont <- cbind(
|
||||
2. * X[, 1],
|
||||
@ -52,21 +51,24 @@ test_that("predict feature interactions works", {
|
||||
expect_equal(dimnames(intr), list(NULL, cn, cn))
|
||||
|
||||
# check the symmetry
|
||||
max(abs(aperm(intr, c(1, 3, 2)) - intr)) %>% expect_lt(0.00001)
|
||||
expect_lt(max(abs(aperm(intr, c(1, 3, 2)) - intr)), 0.00001)
|
||||
|
||||
# sums WRT columns must be close to feature contributions
|
||||
max(abs(apply(intr, c(1, 2), sum) - cont)) %>% expect_lt(0.00001)
|
||||
expect_lt(max(abs(apply(intr, c(1, 2), sum) - cont)), 0.00001)
|
||||
|
||||
# diagonal terms for features 3,4,5 must be close to zero
|
||||
Reduce(max, sapply(3:P, function(i) max(abs(intr[, i, i])))) %>% expect_lt(0.05)
|
||||
expect_lt(Reduce(max, sapply(3:P, function(i) max(abs(intr[, i, i])))), 0.05)
|
||||
|
||||
# BIAS must have no interactions
|
||||
max(abs(intr[, 1:P, P + 1])) %>% expect_lt(0.00001)
|
||||
expect_lt(max(abs(intr[, 1:P, P + 1])), 0.00001)
|
||||
|
||||
# interactions other than 2 x 3 must be close to zero
|
||||
intr23 <- intr
|
||||
intr23[, 2, 3] <- 0
|
||||
Reduce(max, sapply(1:P, function(i) max(abs(intr23[, i, (i + 1):(P + 1)])))) %>% expect_lt(0.05)
|
||||
expect_lt(
|
||||
Reduce(max, sapply(1:P, function(i) max(abs(intr23[, i, (i + 1):(P + 1)])))),
|
||||
0.05
|
||||
)
|
||||
|
||||
# Construct the 'ground truth' contributions of interactions directly from the linear terms:
|
||||
gt_intr <- array(0, c(N, P + 1, P + 1))
|
||||
@ -119,23 +121,39 @@ test_that("multiclass feature interactions work", {
|
||||
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
|
||||
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
|
||||
b <- xgb.train(param, dm, 40)
|
||||
pred <- predict(b, dm, outputmargin = TRUE) %>% array(c(3, 150)) %>% t
|
||||
pred <- t(
|
||||
array(
|
||||
data = predict(b, dm, outputmargin = TRUE),
|
||||
dim = c(3, 150)
|
||||
)
|
||||
)
|
||||
|
||||
# SHAP contributions:
|
||||
cont <- predict(b, dm, predcontrib = TRUE)
|
||||
expect_length(cont, 3)
|
||||
# rewrap them as a 3d array
|
||||
cont <- unlist(cont) %>% array(c(150, 5, 3))
|
||||
cont <- array(
|
||||
data = unlist(cont),
|
||||
dim = c(150, 5, 3)
|
||||
)
|
||||
|
||||
# make sure for each row they add up to marginal predictions
|
||||
max(abs(apply(cont, c(1, 3), sum) - pred)) %>% expect_lt(0.001)
|
||||
expect_lt(max(abs(apply(cont, c(1, 3), sum) - pred)), 0.001)
|
||||
|
||||
# SHAP interaction contributions:
|
||||
intr <- predict(b, dm, predinteraction = TRUE)
|
||||
expect_length(intr, 3)
|
||||
# rewrap them as a 4d array
|
||||
intr <- unlist(intr) %>% array(c(150, 5, 5, 3)) %>% aperm(c(4, 1, 2, 3)) # [grp, row, col, col]
|
||||
intr <- aperm(
|
||||
a = array(
|
||||
data = unlist(intr),
|
||||
dim = c(150, 5, 5, 3)
|
||||
),
|
||||
perm = c(4, 1, 2, 3) # [grp, row, col, col]
|
||||
)
|
||||
|
||||
# check the symmetry
|
||||
max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)) %>% expect_lt(0.00001)
|
||||
expect_lt(max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)), 0.00001)
|
||||
# sums WRT columns must be close to feature contributions
|
||||
max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))) %>% expect_lt(0.00001)
|
||||
expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))), 0.00001)
|
||||
})
|
||||
|
||||
@ -27,7 +27,7 @@ file(WRITE "${build_dir}/R-package/src/Makevars.win" "all:")
|
||||
|
||||
# Install dependencies
|
||||
set(XGB_DEPS_SCRIPT
|
||||
"deps = setdiff(c('data.table', 'jsonlite', 'magrittr', 'Matrix'), rownames(installed.packages())); if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')")
|
||||
"deps = setdiff(c('data.table', 'jsonlite', 'Matrix'), rownames(installed.packages())); if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')")
|
||||
check_call(COMMAND "${LIBR_EXECUTABLE}" -q -e "${XGB_DEPS_SCRIPT}")
|
||||
|
||||
# Install the XGBoost R package
|
||||
|
||||
@ -68,7 +68,7 @@ R
|
||||
.. code-block:: bash
|
||||
|
||||
# Install dependencies
|
||||
R -q -e "install.packages(c('data.table', 'magrittr', 'jsonlite'))"
|
||||
R -q -e "install.packages(c('data.table', 'jsonlite'))"
|
||||
# Install XGBoost
|
||||
R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz
|
||||
|
||||
@ -149,7 +149,7 @@ ID you want to install: ``xgboost_r_gpu_linux_[commit].tar.gz``, download it the
|
||||
.. code-block:: bash
|
||||
|
||||
# Install dependencies
|
||||
R -q -e "install.packages(c('data.table', 'magrittr', 'jsonlite', 'remotes'))"
|
||||
R -q -e "install.packages(c('data.table', 'jsonlite', 'remotes'))"
|
||||
# Install XGBoost
|
||||
R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user