From 894e9bc5d448e506e28a903970a9c56cfd16b914 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 12 May 2021 15:34:59 -0500 Subject: [PATCH] [R-package] remove dependency on {magrittr} (#6928) Co-authored-by: Hyunsu Cho --- .github/workflows/r_nold.yml | 2 +- .github/workflows/r_tests.yml | 2 +- R-package/DESCRIPTION | 1 - R-package/NAMESPACE | 1 - R-package/R/callbacks.R | 9 +++- R-package/R/xgb.Booster.R | 29 +++++++++-- R-package/R/xgb.importance.R | 7 +-- R-package/R/xgb.plot.multi.trees.R | 55 ++++++++++++-------- R-package/R/xgb.plot.tree.R | 32 +++++++----- R-package/R/xgboost.R | 1 - R-package/tests/testthat/test_helpers.R | 8 ++- R-package/tests/testthat/test_interactions.R | 44 +++++++++++----- cmake/RPackageInstall.cmake.in | 2 +- doc/install.rst | 4 +- 14 files changed, 131 insertions(+), 66 deletions(-) diff --git a/.github/workflows/r_nold.yml b/.github/workflows/r_nold.yml index 21d7fd875..554b93571 100644 --- a/.github/workflows/r_nold.yml +++ b/.github/workflows/r_nold.yml @@ -8,7 +8,7 @@ on: types: [created] env: - R_PACKAGES: c('XML', 'igraph', 'data.table', 'magrittr', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic') + R_PACKAGES: c('XML', 'igraph', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic') jobs: test-R-noLD: diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml index c3c50894f..58e0e629a 100644 --- a/.github/workflows/r_tests.yml +++ b/.github/workflows/r_tests.yml @@ -3,7 +3,7 @@ name: XGBoost-R-Tests on: [push, pull_request] env: - R_PACKAGES: c('XML', 'igraph', 'data.table', 'magrittr', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic') + R_PACKAGES: c('XML', 'igraph', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic') jobs: lintr: diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 1dc05165f..7f0418507 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -62,7 +62,6 @@ Imports: Matrix (>= 1.1-0), methods, data.table (>= 1.9.6), - magrittr (>= 1.5), jsonlite (>= 1.0), RoxygenNote: 7.1.1 SystemRequirements: GNU make, C++14 diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index f12f92b9a..bbb5ee225 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -82,7 +82,6 @@ importFrom(graphics,points) importFrom(graphics,title) importFrom(jsonlite,fromJSON) importFrom(jsonlite,toJSON) -importFrom(magrittr,"%>%") importFrom(stats,median) importFrom(stats,predict) importFrom(utils,head) diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index c3ea2acaf..2c1b2ecaa 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -642,8 +642,13 @@ cb.gblinear.history <- function(sparse=FALSE) { coefs <<- list2mat(coefs) } else { # xgb.cv: # first lapply transposes the list - coefs <<- lapply(seq_along(coefs[[1]]), function(i) lapply(coefs, "[[", i)) %>% - lapply(function(x) list2mat(x)) + coefs <<- lapply( + X = lapply( + X = seq_along(coefs[[1]]), + FUN = function(i) lapply(coefs, "[[", i) + ), + FUN = function(x) list2mat(x) + ) } } diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index 487e6957f..af243e302 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -372,8 +372,14 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA } else if (n_group == 1) { matrix(ret, nrow = n_row, byrow = TRUE, dimnames = list(NULL, cnames)) } else { - arr <- array(ret, c(n_col1, n_group, n_row), - dimnames = list(cnames, NULL, NULL)) %>% aperm(c(2, 3, 1)) # [group, row, col] + arr <- aperm( + a = array( + data = ret, + dim = c(n_col1, n_group, n_row), + dimnames = list(cnames, NULL, NULL) + ), + perm = c(2, 3, 1) # [group, row, col] + ) lapply(seq_len(n_group), function(g) arr[g, , ]) } } else if (predinteraction) { @@ -383,10 +389,23 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA ret <- if (n_ret == n_row) { matrix(ret, ncol = 1, dimnames = list(NULL, cnames)) } else if (n_group == 1) { - array(ret, c(n_col1, n_col1, n_row), dimnames = list(cnames, cnames, NULL)) %>% aperm(c(3, 1, 2)) + aperm( + a = array( + data = ret, + dim = c(n_col1, n_col1, n_row), + dimnames = list(cnames, cnames, NULL) + ), + perm = c(3, 1, 2) + ) } else { - arr <- array(ret, c(n_col1, n_col1, n_group, n_row), - dimnames = list(cnames, cnames, NULL, NULL)) %>% aperm(c(3, 4, 1, 2)) # [group, row, col1, col2] + arr <- aperm( + a = array( + data = ret, + dim = c(n_col1, n_col1, n_group, n_row), + dimnames = list(cnames, cnames, NULL, NULL) + ), + perm = c(3, 4, 1, 2) # [group, row, col1, col2] + ) lapply(seq_len(n_group), function(g) arr[g, , , ]) } } else if (reshape && npred_per_case > 1) { diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index b1c59d98d..7305ee571 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -100,9 +100,10 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL, # linear model if (model_text_dump[2] == "bias:"){ - weights <- which(model_text_dump == "weight:") %>% - {model_text_dump[(. + 1):length(model_text_dump)]} %>% - as.numeric + weight_index <- which(model_text_dump == "weight:") + 1 + weights <- as.numeric( + model_text_dump[weight_index:length(model_text_dump)] + ) num_class <- NVL(model$params$num_class, 1) if (is.null(feature_names)) diff --git a/R-package/R/xgb.plot.multi.trees.R b/R-package/R/xgb.plot.multi.trees.R index c884281fc..063f1034a 100644 --- a/R-package/R/xgb.plot.multi.trees.R +++ b/R-package/R/xgb.plot.multi.trees.R @@ -75,8 +75,8 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, while (tree.matrix[, sum(is.na(abs.node.position))] > 0) { yes.row.nodes <- tree.matrix[abs.node.position %in% precedent.nodes & !is.na(Yes)] no.row.nodes <- tree.matrix[abs.node.position %in% precedent.nodes & !is.na(No)] - yes.nodes.abs.pos <- yes.row.nodes[, abs.node.position] %>% paste0("_0") - no.nodes.abs.pos <- no.row.nodes[, abs.node.position] %>% paste0("_1") + yes.nodes.abs.pos <- paste0(yes.row.nodes[, abs.node.position], "_0") + no.nodes.abs.pos <- paste0(no.row.nodes[, abs.node.position], "_1") tree.matrix[ID %in% yes.row.nodes[, Yes], abs.node.position := yes.nodes.abs.pos] tree.matrix[ID %in% no.row.nodes[, No], abs.node.position := no.nodes.abs.pos] @@ -92,19 +92,28 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, nodes.dt <- tree.matrix[ , .(Quality = sum(Quality)) , by = .(abs.node.position, Feature) - ][, .(Text = paste0(Feature[1:min(length(Feature), features_keep)], - " (", - format(Quality[1:min(length(Quality), features_keep)], digits = 5), - ")") %>% - paste0(collapse = "\n")) - , by = abs.node.position] + ][, .(Text = paste0( + paste0( + Feature[1:min(length(Feature), features_keep)], + " (", + format(Quality[1:min(length(Quality), features_keep)], digits = 5), + ")" + ), + collapse = "\n" + ) + ) + , by = abs.node.position + ] - edges.dt <- tree.matrix[Feature != "Leaf", .(abs.node.position, Yes)] %>% - list(tree.matrix[Feature != "Leaf", .(abs.node.position, No)]) %>% - rbindlist() %>% - setnames(c("From", "To")) %>% - .[, .N, .(From, To)] %>% - .[, N := NULL] + edges.dt <- data.table::rbindlist( + l = list( + tree.matrix[Feature != "Leaf", .(abs.node.position, Yes)], + tree.matrix[Feature != "Leaf", .(abs.node.position, No)] + ) + ) + data.table::setnames(edges.dt, c("From", "To")) + edges.dt <- edges.dt[, .N, .(From, To)] + edges.dt[, N := NULL] nodes <- DiagrammeR::create_node_df( n = nrow(nodes.dt), @@ -120,21 +129,25 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, nodes_df = nodes, edges_df = edges, attr_theme = NULL - ) %>% - DiagrammeR::add_global_graph_attrs( + ) + graph <- DiagrammeR::add_global_graph_attrs( + graph = graph, attr_type = "graph", attr = c("layout", "rankdir"), value = c("dot", "LR") - ) %>% - DiagrammeR::add_global_graph_attrs( + ) + graph <- DiagrammeR::add_global_graph_attrs( + graph = graph, attr_type = "node", attr = c("color", "fillcolor", "style", "shape", "fontname"), value = c("DimGray", "beige", "filled", "rectangle", "Helvetica") - ) %>% - DiagrammeR::add_global_graph_attrs( + ) + graph <- DiagrammeR::add_global_graph_attrs( + graph = graph, attr_type = "edge", attr = c("color", "arrowsize", "arrowhead", "fontname"), - value = c("DimGray", "1.5", "vee", "Helvetica")) + value = c("DimGray", "1.5", "vee", "Helvetica") + ) if (!render) return(invisible(graph)) diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R index 6f0efd509..71b9f08a5 100644 --- a/R-package/R/xgb.plot.tree.R +++ b/R-package/R/xgb.plot.tree.R @@ -99,33 +99,41 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot fontcolor = "black") edges <- DiagrammeR::create_edge_df( - from = match(dt[Feature != "Leaf", c(ID)] %>% rep(2), dt$ID), + from = match(rep(dt[Feature != "Leaf", c(ID)], 2), dt$ID), to = match(dt[Feature != "Leaf", c(Yes, No)], dt$ID), - label = dt[Feature != "Leaf", paste("<", Split)] %>% - c(rep("", nrow(dt[Feature != "Leaf"]))), - style = dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")] %>% - c(dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]), + label = c( + dt[Feature != "Leaf", paste("<", Split)], + rep("", nrow(dt[Feature != "Leaf"])) + ), + style = c( + dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")], + dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")] + ), rel = "leading_to") graph <- DiagrammeR::create_graph( nodes_df = nodes, edges_df = edges, attr_theme = NULL - ) %>% - DiagrammeR::add_global_graph_attrs( + ) + graph <- DiagrammeR::add_global_graph_attrs( + graph = graph, attr_type = "graph", attr = c("layout", "rankdir"), value = c("dot", "LR") - ) %>% - DiagrammeR::add_global_graph_attrs( + ) + graph <- DiagrammeR::add_global_graph_attrs( + graph = graph, attr_type = "node", attr = c("color", "style", "fontname"), value = c("DimGray", "filled", "Helvetica") - ) %>% - DiagrammeR::add_global_graph_attrs( + ) + graph <- DiagrammeR::add_global_graph_attrs( + graph = graph, attr_type = "edge", attr = c("color", "arrowsize", "arrowhead", "fontname"), - value = c("DimGray", "1.5", "vee", "Helvetica")) + value = c("DimGray", "1.5", "vee", "Helvetica") + ) if (!render) return(invisible(graph)) diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index 74c1e8480..460f3c963 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -90,7 +90,6 @@ NULL #' @importFrom data.table setkey #' @importFrom data.table setkeyv #' @importFrom data.table setnames -#' @importFrom magrittr %>% #' @importFrom jsonlite fromJSON #' @importFrom jsonlite toJSON #' @importFrom utils object.size str tail diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 19709cb38..d52b70619 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -110,7 +110,7 @@ test_that("predict feature contributions works", { pred <- predict(bst.GLM, sparse_matrix, outputmargin = TRUE) expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5) # manual calculation of linear terms - coefs <- xgb.dump(bst.GLM)[-c(1, 2, 4)] %>% as.numeric + coefs <- as.numeric(xgb.dump(bst.GLM)[-c(1, 2, 4)]) coefs <- c(coefs[-1], coefs[1]) # intercept must be the last pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN = "*") expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual), @@ -130,7 +130,11 @@ test_that("predict feature contributions works", { pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE) pred_contr <- predict(mbst.GLM, as.matrix(iris[, -5]), predcontrib = TRUE) expect_length(pred_contr, 3) - coefs_all <- xgb.dump(mbst.GLM)[-c(1, 2, 6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE) + coefs_all <- matrix( + data = as.numeric(xgb.dump(mbst.GLM)[-c(1, 2, 6)]), + ncol = 3, + byrow = TRUE + ) for (g in seq_along(pred_contr)) { expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS")) expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance) diff --git a/R-package/tests/testthat/test_interactions.R b/R-package/tests/testthat/test_interactions.R index be7698ce9..7b86537c0 100644 --- a/R-package/tests/testthat/test_interactions.R +++ b/R-package/tests/testthat/test_interactions.R @@ -1,7 +1,6 @@ context('Test prediction of feature interactions') require(xgboost) -require(magrittr) set.seed(123) @@ -32,7 +31,7 @@ test_that("predict feature interactions works", { cont <- predict(b, dm, predcontrib = TRUE) expect_equal(dim(cont), c(N, P + 1)) # make sure for each row they add up to marginal predictions - max(abs(rowSums(cont) - pred)) %>% expect_lt(0.001) + expect_lt(max(abs(rowSums(cont) - pred)), 0.001) # Hand-construct the 'ground truth' feature contributions: gt_cont <- cbind( 2. * X[, 1], @@ -52,21 +51,24 @@ test_that("predict feature interactions works", { expect_equal(dimnames(intr), list(NULL, cn, cn)) # check the symmetry - max(abs(aperm(intr, c(1, 3, 2)) - intr)) %>% expect_lt(0.00001) + expect_lt(max(abs(aperm(intr, c(1, 3, 2)) - intr)), 0.00001) # sums WRT columns must be close to feature contributions - max(abs(apply(intr, c(1, 2), sum) - cont)) %>% expect_lt(0.00001) + expect_lt(max(abs(apply(intr, c(1, 2), sum) - cont)), 0.00001) # diagonal terms for features 3,4,5 must be close to zero - Reduce(max, sapply(3:P, function(i) max(abs(intr[, i, i])))) %>% expect_lt(0.05) + expect_lt(Reduce(max, sapply(3:P, function(i) max(abs(intr[, i, i])))), 0.05) # BIAS must have no interactions - max(abs(intr[, 1:P, P + 1])) %>% expect_lt(0.00001) + expect_lt(max(abs(intr[, 1:P, P + 1])), 0.00001) # interactions other than 2 x 3 must be close to zero intr23 <- intr intr23[, 2, 3] <- 0 - Reduce(max, sapply(1:P, function(i) max(abs(intr23[, i, (i + 1):(P + 1)])))) %>% expect_lt(0.05) + expect_lt( + Reduce(max, sapply(1:P, function(i) max(abs(intr23[, i, (i + 1):(P + 1)])))), + 0.05 + ) # Construct the 'ground truth' contributions of interactions directly from the linear terms: gt_intr <- array(0, c(N, P + 1, P + 1)) @@ -119,23 +121,39 @@ test_that("multiclass feature interactions work", { dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1) param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3) b <- xgb.train(param, dm, 40) - pred <- predict(b, dm, outputmargin = TRUE) %>% array(c(3, 150)) %>% t + pred <- t( + array( + data = predict(b, dm, outputmargin = TRUE), + dim = c(3, 150) + ) + ) # SHAP contributions: cont <- predict(b, dm, predcontrib = TRUE) expect_length(cont, 3) # rewrap them as a 3d array - cont <- unlist(cont) %>% array(c(150, 5, 3)) + cont <- array( + data = unlist(cont), + dim = c(150, 5, 3) + ) + # make sure for each row they add up to marginal predictions - max(abs(apply(cont, c(1, 3), sum) - pred)) %>% expect_lt(0.001) + expect_lt(max(abs(apply(cont, c(1, 3), sum) - pred)), 0.001) # SHAP interaction contributions: intr <- predict(b, dm, predinteraction = TRUE) expect_length(intr, 3) # rewrap them as a 4d array - intr <- unlist(intr) %>% array(c(150, 5, 5, 3)) %>% aperm(c(4, 1, 2, 3)) # [grp, row, col, col] + intr <- aperm( + a = array( + data = unlist(intr), + dim = c(150, 5, 5, 3) + ), + perm = c(4, 1, 2, 3) # [grp, row, col, col] + ) + # check the symmetry - max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)) %>% expect_lt(0.00001) + expect_lt(max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)), 0.00001) # sums WRT columns must be close to feature contributions - max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))) %>% expect_lt(0.00001) + expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))), 0.00001) }) diff --git a/cmake/RPackageInstall.cmake.in b/cmake/RPackageInstall.cmake.in index 20015f4c8..bde4c75c7 100644 --- a/cmake/RPackageInstall.cmake.in +++ b/cmake/RPackageInstall.cmake.in @@ -27,7 +27,7 @@ file(WRITE "${build_dir}/R-package/src/Makevars.win" "all:") # Install dependencies set(XGB_DEPS_SCRIPT - "deps = setdiff(c('data.table', 'jsonlite', 'magrittr', 'Matrix'), rownames(installed.packages())); if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')") + "deps = setdiff(c('data.table', 'jsonlite', 'Matrix'), rownames(installed.packages())); if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')") check_call(COMMAND "${LIBR_EXECUTABLE}" -q -e "${XGB_DEPS_SCRIPT}") # Install the XGBoost R package diff --git a/doc/install.rst b/doc/install.rst index 748b46532..31635d3a2 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -68,7 +68,7 @@ R .. code-block:: bash # Install dependencies - R -q -e "install.packages(c('data.table', 'magrittr', 'jsonlite'))" + R -q -e "install.packages(c('data.table', 'jsonlite'))" # Install XGBoost R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz @@ -149,7 +149,7 @@ ID you want to install: ``xgboost_r_gpu_linux_[commit].tar.gz``, download it the .. code-block:: bash # Install dependencies - R -q -e "install.packages(c('data.table', 'magrittr', 'jsonlite', 'remotes'))" + R -q -e "install.packages(c('data.table', 'jsonlite', 'remotes'))" # Install XGBoost R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz