From 894e9bc5d448e506e28a903970a9c56cfd16b914 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 12 May 2021 15:34:59 -0500
Subject: [PATCH] [R-package] remove dependency on {magrittr} (#6928)

Co-authored-by: Hyunsu Cho <chohyu01@cs.washington.edu>
---
 .github/workflows/r_nold.yml                 |  2 +-
 .github/workflows/r_tests.yml                |  2 +-
 R-package/DESCRIPTION                        |  1 -
 R-package/NAMESPACE                          |  1 -
 R-package/R/callbacks.R                      |  9 +++-
 R-package/R/xgb.Booster.R                    | 29 +++++++++--
 R-package/R/xgb.importance.R                 |  7 +--
 R-package/R/xgb.plot.multi.trees.R           | 55 ++++++++++++--------
 R-package/R/xgb.plot.tree.R                  | 32 +++++++-----
 R-package/R/xgboost.R                        |  1 -
 R-package/tests/testthat/test_helpers.R      |  8 ++-
 R-package/tests/testthat/test_interactions.R | 44 +++++++++++-----
 cmake/RPackageInstall.cmake.in               |  2 +-
 doc/install.rst                              |  4 +-
 14 files changed, 131 insertions(+), 66 deletions(-)

diff --git a/.github/workflows/r_nold.yml b/.github/workflows/r_nold.yml
index 21d7fd875..554b93571 100644
--- a/.github/workflows/r_nold.yml
+++ b/.github/workflows/r_nold.yml
@@ -8,7 +8,7 @@ on:
     types: [created]
 
 env:
-  R_PACKAGES: c('XML', 'igraph', 'data.table', 'magrittr', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
+  R_PACKAGES: c('XML', 'igraph', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
 
 jobs:
   test-R-noLD:
diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml
index c3c50894f..58e0e629a 100644
--- a/.github/workflows/r_tests.yml
+++ b/.github/workflows/r_tests.yml
@@ -3,7 +3,7 @@ name: XGBoost-R-Tests
 on: [push, pull_request]
 
 env:
-  R_PACKAGES: c('XML', 'igraph', 'data.table', 'magrittr', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
+  R_PACKAGES: c('XML', 'igraph', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic')
 
 jobs:
   lintr:
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 1dc05165f..7f0418507 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -62,7 +62,6 @@ Imports:
     Matrix (>= 1.1-0),
     methods,
     data.table (>= 1.9.6),
-    magrittr (>= 1.5),
     jsonlite (>= 1.0),
 RoxygenNote: 7.1.1
 SystemRequirements: GNU make, C++14
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index f12f92b9a..bbb5ee225 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -82,7 +82,6 @@ importFrom(graphics,points)
 importFrom(graphics,title)
 importFrom(jsonlite,fromJSON)
 importFrom(jsonlite,toJSON)
-importFrom(magrittr,"%>%")
 importFrom(stats,median)
 importFrom(stats,predict)
 importFrom(utils,head)
diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
index c3ea2acaf..2c1b2ecaa 100644
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -642,8 +642,13 @@ cb.gblinear.history <- function(sparse=FALSE) {
       coefs <<- list2mat(coefs)
     } else { # xgb.cv:
       # first lapply transposes the list
-      coefs <<- lapply(seq_along(coefs[[1]]), function(i) lapply(coefs, "[[", i)) %>%
-                lapply(function(x) list2mat(x))
+      coefs <<- lapply(
+        X = lapply(
+          X = seq_along(coefs[[1]]),
+          FUN = function(i) lapply(coefs, "[[", i)
+        ),
+        FUN = function(x) list2mat(x)
+      )
     }
   }
 
diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index 487e6957f..af243e302 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -372,8 +372,14 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
     } else if (n_group == 1) {
       matrix(ret, nrow = n_row, byrow = TRUE, dimnames = list(NULL, cnames))
     } else {
-      arr <- array(ret, c(n_col1, n_group, n_row),
-                   dimnames = list(cnames, NULL, NULL)) %>% aperm(c(2, 3, 1)) # [group, row, col]
+      arr <- aperm(
+        a = array(
+          data = ret,
+          dim = c(n_col1, n_group, n_row),
+          dimnames = list(cnames, NULL, NULL)
+        ),
+        perm = c(2, 3, 1)  # [group, row, col]
+      )
       lapply(seq_len(n_group), function(g) arr[g, , ])
     }
   } else if (predinteraction) {
@@ -383,10 +389,23 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
     ret <- if (n_ret == n_row) {
       matrix(ret, ncol = 1, dimnames = list(NULL, cnames))
     } else if (n_group == 1) {
-      array(ret, c(n_col1, n_col1, n_row), dimnames = list(cnames, cnames, NULL)) %>% aperm(c(3, 1, 2))
+      aperm(
+        a = array(
+          data = ret,
+          dim = c(n_col1, n_col1, n_row),
+          dimnames = list(cnames, cnames, NULL)
+        ),
+        perm = c(3, 1, 2)
+      )
     } else {
-      arr <- array(ret, c(n_col1, n_col1, n_group, n_row),
-                   dimnames = list(cnames, cnames, NULL, NULL)) %>% aperm(c(3, 4, 1, 2)) # [group, row, col1, col2]
+      arr <- aperm(
+        a = array(
+          data = ret,
+          dim = c(n_col1, n_col1, n_group, n_row),
+          dimnames = list(cnames, cnames, NULL, NULL)
+        ),
+        perm = c(3, 4, 1, 2)  # [group, row, col1, col2]
+      )
       lapply(seq_len(n_group), function(g) arr[g, , , ])
     }
   } else if (reshape && npred_per_case > 1) {
diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R
index b1c59d98d..7305ee571 100644
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@@ -100,9 +100,10 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
 
   # linear model
   if (model_text_dump[2] == "bias:"){
-    weights <- which(model_text_dump == "weight:") %>%
-               {model_text_dump[(. + 1):length(model_text_dump)]} %>%
-               as.numeric
+    weight_index <- which(model_text_dump == "weight:") + 1
+    weights <- as.numeric(
+      model_text_dump[weight_index:length(model_text_dump)]
+    )
 
     num_class <- NVL(model$params$num_class, 1)
     if (is.null(feature_names))
diff --git a/R-package/R/xgb.plot.multi.trees.R b/R-package/R/xgb.plot.multi.trees.R
index c884281fc..063f1034a 100644
--- a/R-package/R/xgb.plot.multi.trees.R
+++ b/R-package/R/xgb.plot.multi.trees.R
@@ -75,8 +75,8 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
   while (tree.matrix[, sum(is.na(abs.node.position))] > 0) {
     yes.row.nodes <- tree.matrix[abs.node.position %in% precedent.nodes & !is.na(Yes)]
     no.row.nodes <- tree.matrix[abs.node.position %in% precedent.nodes & !is.na(No)]
-    yes.nodes.abs.pos <- yes.row.nodes[, abs.node.position] %>% paste0("_0")
-    no.nodes.abs.pos <- no.row.nodes[, abs.node.position] %>% paste0("_1")
+    yes.nodes.abs.pos <- paste0(yes.row.nodes[, abs.node.position], "_0")
+    no.nodes.abs.pos <- paste0(no.row.nodes[, abs.node.position], "_1")
 
     tree.matrix[ID %in% yes.row.nodes[, Yes], abs.node.position := yes.nodes.abs.pos]
     tree.matrix[ID %in% no.row.nodes[, No], abs.node.position := no.nodes.abs.pos]
@@ -92,19 +92,28 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
   nodes.dt <- tree.matrix[
         , .(Quality = sum(Quality))
         , by = .(abs.node.position, Feature)
-      ][, .(Text = paste0(Feature[1:min(length(Feature), features_keep)],
-                          " (",
-                          format(Quality[1:min(length(Quality), features_keep)], digits = 5),
-                          ")") %>%
-                   paste0(collapse = "\n"))
-        , by = abs.node.position]
+      ][, .(Text = paste0(
+              paste0(
+                Feature[1:min(length(Feature), features_keep)],
+                " (",
+                format(Quality[1:min(length(Quality), features_keep)], digits = 5),
+                ")"
+              ),
+              collapse = "\n"
+            )
+          )
+        , by = abs.node.position
+      ]
 
-  edges.dt <- tree.matrix[Feature != "Leaf", .(abs.node.position, Yes)] %>%
-    list(tree.matrix[Feature != "Leaf", .(abs.node.position, No)]) %>%
-    rbindlist() %>%
-    setnames(c("From", "To")) %>%
-    .[, .N, .(From, To)] %>%
-    .[, N := NULL]
+  edges.dt <- data.table::rbindlist(
+    l = list(
+      tree.matrix[Feature != "Leaf", .(abs.node.position, Yes)],
+      tree.matrix[Feature != "Leaf", .(abs.node.position, No)]
+    )
+  )
+  data.table::setnames(edges.dt, c("From", "To"))
+  edges.dt <- edges.dt[, .N, .(From, To)]
+  edges.dt[, N := NULL]
 
   nodes <- DiagrammeR::create_node_df(
     n = nrow(nodes.dt),
@@ -120,21 +129,25 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
       nodes_df = nodes,
       edges_df = edges,
       attr_theme = NULL
-      ) %>%
-    DiagrammeR::add_global_graph_attrs(
+  )
+  graph <- DiagrammeR::add_global_graph_attrs(
+      graph = graph,
       attr_type = "graph",
       attr  = c("layout", "rankdir"),
       value = c("dot", "LR")
-      ) %>%
-    DiagrammeR::add_global_graph_attrs(
+  )
+  graph <- DiagrammeR::add_global_graph_attrs(
+      graph = graph,
       attr_type = "node",
       attr  = c("color", "fillcolor", "style", "shape", "fontname"),
       value = c("DimGray", "beige", "filled", "rectangle", "Helvetica")
-      ) %>%
-    DiagrammeR::add_global_graph_attrs(
+  )
+  graph <- DiagrammeR::add_global_graph_attrs(
+      graph = graph,
       attr_type = "edge",
       attr  = c("color", "arrowsize", "arrowhead", "fontname"),
-      value = c("DimGray", "1.5", "vee", "Helvetica"))
+      value = c("DimGray", "1.5", "vee", "Helvetica")
+  )
 
   if (!render) return(invisible(graph))
 
diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R
index 6f0efd509..71b9f08a5 100644
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@@ -99,33 +99,41 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
     fontcolor = "black")
 
   edges <- DiagrammeR::create_edge_df(
-    from  = match(dt[Feature != "Leaf", c(ID)] %>% rep(2), dt$ID),
+    from  = match(rep(dt[Feature != "Leaf", c(ID)], 2), dt$ID),
     to    = match(dt[Feature != "Leaf", c(Yes, No)], dt$ID),
-    label = dt[Feature != "Leaf", paste("<", Split)] %>%
-            c(rep("", nrow(dt[Feature != "Leaf"]))),
-    style = dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")] %>%
-            c(dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]),
+    label = c(
+      dt[Feature != "Leaf", paste("<", Split)],
+      rep("", nrow(dt[Feature != "Leaf"]))
+    ),
+    style = c(
+      dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")],
+      dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]
+    ),
     rel   = "leading_to")
 
   graph <- DiagrammeR::create_graph(
       nodes_df = nodes,
       edges_df = edges,
       attr_theme = NULL
-      ) %>%
-    DiagrammeR::add_global_graph_attrs(
+  )
+  graph <- DiagrammeR::add_global_graph_attrs(
+      graph = graph,
       attr_type = "graph",
       attr  = c("layout", "rankdir"),
       value = c("dot", "LR")
-      ) %>%
-    DiagrammeR::add_global_graph_attrs(
+  )
+  graph <- DiagrammeR::add_global_graph_attrs(
+      graph = graph,
       attr_type = "node",
       attr  = c("color", "style", "fontname"),
       value = c("DimGray", "filled", "Helvetica")
-      ) %>%
-    DiagrammeR::add_global_graph_attrs(
+  )
+  graph <- DiagrammeR::add_global_graph_attrs(
+      graph = graph,
       attr_type = "edge",
       attr  = c("color", "arrowsize", "arrowhead", "fontname"),
-      value = c("DimGray", "1.5", "vee", "Helvetica"))
+      value = c("DimGray", "1.5", "vee", "Helvetica")
+  )
 
   if (!render) return(invisible(graph))
 
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index 74c1e8480..460f3c963 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -90,7 +90,6 @@ NULL
 #' @importFrom data.table setkey
 #' @importFrom data.table setkeyv
 #' @importFrom data.table setnames
-#' @importFrom magrittr %>%
 #' @importFrom jsonlite fromJSON
 #' @importFrom jsonlite toJSON
 #' @importFrom utils object.size str tail
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index 19709cb38..d52b70619 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -110,7 +110,7 @@ test_that("predict feature contributions works", {
   pred <- predict(bst.GLM, sparse_matrix, outputmargin = TRUE)
   expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
   # manual calculation of linear terms
-  coefs <- xgb.dump(bst.GLM)[-c(1, 2, 4)] %>% as.numeric
+  coefs <- as.numeric(xgb.dump(bst.GLM)[-c(1, 2, 4)])
   coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
   pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN = "*")
   expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual),
@@ -130,7 +130,11 @@ test_that("predict feature contributions works", {
   pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
   pred_contr <- predict(mbst.GLM, as.matrix(iris[, -5]), predcontrib = TRUE)
   expect_length(pred_contr, 3)
-  coefs_all <- xgb.dump(mbst.GLM)[-c(1, 2, 6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE)
+  coefs_all <- matrix(
+    data = as.numeric(xgb.dump(mbst.GLM)[-c(1, 2, 6)]),
+    ncol = 3,
+    byrow = TRUE
+  )
   for (g in seq_along(pred_contr)) {
     expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
     expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
diff --git a/R-package/tests/testthat/test_interactions.R b/R-package/tests/testthat/test_interactions.R
index be7698ce9..7b86537c0 100644
--- a/R-package/tests/testthat/test_interactions.R
+++ b/R-package/tests/testthat/test_interactions.R
@@ -1,7 +1,6 @@
 context('Test prediction of feature interactions')
 
 require(xgboost)
-require(magrittr)
 
 set.seed(123)
 
@@ -32,7 +31,7 @@ test_that("predict feature interactions works", {
   cont <- predict(b, dm, predcontrib = TRUE)
   expect_equal(dim(cont), c(N, P + 1))
   # make sure for each row they add up to marginal predictions
-  max(abs(rowSums(cont) - pred)) %>% expect_lt(0.001)
+  expect_lt(max(abs(rowSums(cont) - pred)), 0.001)
   # Hand-construct the 'ground truth' feature contributions:
   gt_cont <- cbind(
       2. * X[, 1],
@@ -52,21 +51,24 @@ test_that("predict feature interactions works", {
   expect_equal(dimnames(intr), list(NULL, cn, cn))
 
   # check the symmetry
-  max(abs(aperm(intr, c(1, 3, 2)) - intr)) %>% expect_lt(0.00001)
+  expect_lt(max(abs(aperm(intr, c(1, 3, 2)) - intr)), 0.00001)
 
   # sums WRT columns must be close to feature contributions
-  max(abs(apply(intr, c(1, 2), sum) - cont)) %>% expect_lt(0.00001)
+  expect_lt(max(abs(apply(intr, c(1, 2), sum) - cont)), 0.00001)
 
   # diagonal terms for features 3,4,5 must be close to zero
-  Reduce(max, sapply(3:P, function(i) max(abs(intr[, i, i])))) %>% expect_lt(0.05)
+  expect_lt(Reduce(max, sapply(3:P, function(i) max(abs(intr[, i, i])))), 0.05)
 
   # BIAS must have no interactions
-  max(abs(intr[, 1:P, P + 1])) %>% expect_lt(0.00001)
+  expect_lt(max(abs(intr[, 1:P, P + 1])), 0.00001)
 
   # interactions other than 2 x 3 must be close to zero
   intr23 <- intr
   intr23[, 2, 3] <- 0
-  Reduce(max, sapply(1:P, function(i) max(abs(intr23[, i, (i + 1):(P + 1)])))) %>% expect_lt(0.05)
+  expect_lt(
+    Reduce(max, sapply(1:P, function(i) max(abs(intr23[, i, (i + 1):(P + 1)])))),
+    0.05
+  )
 
   # Construct the 'ground truth' contributions of interactions directly from the linear terms:
   gt_intr <- array(0, c(N, P + 1, P + 1))
@@ -119,23 +121,39 @@ test_that("multiclass feature interactions work", {
   dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
   param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
   b <- xgb.train(param, dm, 40)
-  pred <- predict(b, dm, outputmargin = TRUE) %>% array(c(3, 150)) %>% t
+  pred <- t(
+    array(
+      data = predict(b, dm, outputmargin = TRUE),
+      dim = c(3, 150)
+    )
+  )
 
   # SHAP contributions:
   cont <- predict(b, dm, predcontrib = TRUE)
   expect_length(cont, 3)
   # rewrap them as a 3d array
-  cont <- unlist(cont) %>% array(c(150, 5, 3))
+  cont <- array(
+    data = unlist(cont),
+    dim = c(150, 5,  3)
+  )
+
   # make sure for each row they add up to marginal predictions
-  max(abs(apply(cont, c(1, 3), sum) - pred)) %>% expect_lt(0.001)
+  expect_lt(max(abs(apply(cont, c(1, 3), sum) - pred)), 0.001)
 
   # SHAP interaction contributions:
   intr <- predict(b, dm, predinteraction = TRUE)
   expect_length(intr, 3)
   # rewrap them as a 4d array
-  intr <- unlist(intr) %>% array(c(150, 5, 5, 3)) %>% aperm(c(4, 1, 2, 3)) # [grp, row, col, col]
+  intr <- aperm(
+    a = array(
+      data = unlist(intr),
+      dim = c(150, 5, 5, 3)
+    ),
+    perm = c(4, 1, 2, 3)  # [grp, row, col, col]
+  )
+
   # check the symmetry
-  max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)) %>% expect_lt(0.00001)
+  expect_lt(max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)), 0.00001)
   # sums WRT columns must be close to feature contributions
-  max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))) %>% expect_lt(0.00001)
+  expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))), 0.00001)
 })
diff --git a/cmake/RPackageInstall.cmake.in b/cmake/RPackageInstall.cmake.in
index 20015f4c8..bde4c75c7 100644
--- a/cmake/RPackageInstall.cmake.in
+++ b/cmake/RPackageInstall.cmake.in
@@ -27,7 +27,7 @@ file(WRITE "${build_dir}/R-package/src/Makevars.win" "all:")
 
 # Install dependencies
 set(XGB_DEPS_SCRIPT
-    "deps = setdiff(c('data.table', 'jsonlite', 'magrittr', 'Matrix'), rownames(installed.packages())); if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')")
+    "deps = setdiff(c('data.table', 'jsonlite', 'Matrix'), rownames(installed.packages())); if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')")
 check_call(COMMAND "${LIBR_EXECUTABLE}" -q -e "${XGB_DEPS_SCRIPT}")
 
 # Install the XGBoost R package
diff --git a/doc/install.rst b/doc/install.rst
index 748b46532..31635d3a2 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -68,7 +68,7 @@ R
   .. code-block:: bash
 
     # Install dependencies
-    R -q -e "install.packages(c('data.table', 'magrittr', 'jsonlite'))"
+    R -q -e "install.packages(c('data.table', 'jsonlite'))"
     # Install XGBoost
     R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz
 
@@ -149,7 +149,7 @@ ID you want to install: ``xgboost_r_gpu_linux_[commit].tar.gz``, download it the
 .. code-block:: bash
 
   # Install dependencies
-  R -q -e "install.packages(c('data.table', 'magrittr', 'jsonlite', 'remotes'))"
+  R -q -e "install.packages(c('data.table', 'jsonlite', 'remotes'))"
   # Install XGBoost
   R CMD INSTALL ./xgboost_r_gpu_linux.tar.gz