[R] Remove stringi dependency (#6109)

* [R] Fix empty empty tests and a test warnings

* [R] Remove stringi dependency (fix #5905)

* Fix R lint check

* [R] Fix automatic conversion to factor in R < 4.0.0 in xgb.model.dt.tree

* Add `R` Makefile variable

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
Vitalie Spinu 2020-09-12 22:18:08 +02:00 committed by GitHub
parent 07945290a2
commit 1453bee3e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 30 additions and 40 deletions

View File

@ -138,12 +138,14 @@ Rpack: clean_all
rm xgboost/remove_warning_suppression_pragma.sh rm xgboost/remove_warning_suppression_pragma.sh
rm -rfv xgboost/tests/helper_scripts/ rm -rfv xgboost/tests/helper_scripts/
R ?= R
Rbuild: Rpack Rbuild: Rpack
R CMD build --no-build-vignettes xgboost $(R) CMD build --no-build-vignettes xgboost
rm -rf xgboost rm -rf xgboost
Rcheck: Rbuild Rcheck: Rbuild
R CMD check --as-cran xgboost*.tar.gz $(R) CMD check --as-cran xgboost*.tar.gz
-include build/*.d -include build/*.d
-include build/*/*.d -include build/*/*.d

View File

@ -63,6 +63,5 @@ Imports:
methods, methods,
data.table (>= 1.9.6), data.table (>= 1.9.6),
magrittr (>= 1.5), magrittr (>= 1.5),
stringi (>= 0.5.2)
RoxygenNote: 7.1.1 RoxygenNote: 7.1.1
SystemRequirements: GNU make, C++14 SystemRequirements: GNU make, C++14

View File

@ -81,11 +81,6 @@ importFrom(graphics,title)
importFrom(magrittr,"%>%") importFrom(magrittr,"%>%")
importFrom(stats,median) importFrom(stats,median)
importFrom(stats,predict) importFrom(stats,predict)
importFrom(stringi,stri_detect_regex)
importFrom(stringi,stri_match_first_regex)
importFrom(stringi,stri_replace_all_regex)
importFrom(stringi,stri_replace_first_regex)
importFrom(stringi,stri_split_regex)
importFrom(utils,head) importFrom(utils,head)
importFrom(utils,object.size) importFrom(utils,object.size)
importFrom(utils,str) importFrom(utils,str)

View File

@ -167,9 +167,8 @@ xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
evnames <- names(watchlist) evnames <- names(watchlist)
if (is.null(feval)) { if (is.null(feval)) {
msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames)) msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames))
msg <- stri_split_regex(msg, '(\\s+|:|\\s+)')[[1]][-1] mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
res <- as.numeric(msg[c(FALSE, TRUE)]) # even indices are the values res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
names(res) <- msg[c(TRUE, FALSE)] # odds are the names
} else { } else {
res <- sapply(seq_along(watchlist), function(j) { res <- sapply(seq_along(watchlist), function(j) {
w <- watchlist[[j]] w <- watchlist[[j]]

View File

@ -56,10 +56,10 @@ xgb.dump <- function(model, fname = NULL, fmap = "", with_stats=FALSE,
as.character(dump_format)) as.character(dump_format))
if (is.null(fname)) if (is.null(fname))
model_dump <- stri_replace_all_regex(model_dump, '\t', '') model_dump <- gsub('\t', '', model_dump, fixed = TRUE)
if (dump_format == "text") if (dump_format == "text")
model_dump <- unlist(stri_split_regex(model_dump, '\n')) model_dump <- unlist(strsplit(model_dump, '\n', fixed = TRUE))
model_dump <- grep('^\\s*$', model_dump, invert = TRUE, value = TRUE) model_dump <- grep('^\\s*$', model_dump, invert = TRUE, value = TRUE)

View File

@ -87,11 +87,11 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
} }
if (length(text) < 2 || if (length(text) < 2 ||
sum(stri_detect_regex(text, 'yes=(\\d+),no=(\\d+)')) < 1) { sum(grepl('yes=(\\d+),no=(\\d+)', text)) < 1) {
stop("Non-tree model detected! This function can only be used with tree models.") stop("Non-tree model detected! This function can only be used with tree models.")
} }
position <- which(!is.na(stri_match_first_regex(text, "booster"))) position <- which(grepl("booster", text, fixed = TRUE))
add.tree.id <- function(node, tree) if (use_int_id) node else paste(tree, node, sep = "-") add.tree.id <- function(node, tree) if (use_int_id) node else paste(tree, node, sep = "-")
@ -108,9 +108,9 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
} }
td <- td[Tree %in% trees & !grepl('^booster', t)] td <- td[Tree %in% trees & !grepl('^booster', t)]
td[, Node := stri_match_first_regex(t, "(\\d+):")[, 2] %>% as.integer] td[, Node := as.integer(sub("^([0-9]+):.*", "\\1", t))]
if (!use_int_id) td[, ID := add.tree.id(Node, Tree)] if (!use_int_id) td[, ID := add.tree.id(Node, Tree)]
td[, isLeaf := !is.na(stri_match_first_regex(t, "leaf"))] td[, isLeaf := grepl("leaf", t, fixed = TRUE)]
# parse branch lines # parse branch lines
branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),", branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
@ -118,10 +118,11 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover") branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
td[isLeaf == FALSE, td[isLeaf == FALSE,
(branch_cols) := { (branch_cols) := {
matches <- regmatches(t, regexec(branch_rx, t))
# skip some indices with spurious capture groups from anynumber_regex # skip some indices with spurious capture groups from anynumber_regex
xtr <- stri_match_first_regex(t, branch_rx)[, c(2, 3, 5, 6, 7, 8, 10), drop = FALSE] xtr <- do.call(rbind, matches)[, c(2, 3, 5, 6, 7, 8, 10), drop = FALSE]
xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree) xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
lapply(seq_len(ncol(xtr)), function(i) xtr[, i]) as.data.table(xtr)
}] }]
# assign feature_names when available # assign feature_names when available
if (!is.null(feature_names)) { if (!is.null(feature_names)) {
@ -135,8 +136,9 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
leaf_cols <- c("Feature", "Quality", "Cover") leaf_cols <- c("Feature", "Quality", "Cover")
td[isLeaf == TRUE, td[isLeaf == TRUE,
(leaf_cols) := { (leaf_cols) := {
xtr <- stri_match_first_regex(t, leaf_rx)[, c(2, 4)] matches <- regmatches(t, regexec(leaf_rx, t))
c("Leaf", lapply(seq_len(ncol(xtr)), function(i) xtr[, i])) xtr <- do.call(rbind, matches)[, c(2, 4)]
c("Leaf", as.data.table(xtr))
}] }]
# convert some columns to numeric # convert some columns to numeric

View File

@ -67,7 +67,7 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
# first number of the path represents the tree, then the following numbers are related to the path to follow # first number of the path represents the tree, then the following numbers are related to the path to follow
# root init # root init
root.nodes <- tree.matrix[stri_detect_regex(ID, "\\d+-0"), ID] root.nodes <- tree.matrix[Node == 0, ID]
tree.matrix[ID %in% root.nodes, abs.node.position := root.nodes] tree.matrix[ID %in% root.nodes, abs.node.position := root.nodes]
precedent.nodes <- root.nodes precedent.nodes <- root.nodes
@ -86,11 +86,8 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
tree.matrix[!is.na(Yes), Yes := paste0(abs.node.position, "_0")] tree.matrix[!is.na(Yes), Yes := paste0(abs.node.position, "_0")]
tree.matrix[!is.na(No), No := paste0(abs.node.position, "_1")] tree.matrix[!is.na(No), No := paste0(abs.node.position, "_1")]
remove.tree <- . %>% stri_replace_first_regex(pattern = "^\\d+-", replacement = "") for (nm in c("abs.node.position", "Yes", "No"))
data.table::set(tree.matrix, j = nm, value = sub("^\\d+-", "", tree.matrix[[nm]]))
tree.matrix[, `:=`(abs.node.position = remove.tree(abs.node.position),
Yes = remove.tree(Yes),
No = remove.tree(No))]
nodes.dt <- tree.matrix[ nodes.dt <- tree.matrix[
, .(Quality = sum(Quality)) , .(Quality = sum(Quality))

View File

@ -91,11 +91,6 @@ NULL
#' @importFrom data.table setkeyv #' @importFrom data.table setkeyv
#' @importFrom data.table setnames #' @importFrom data.table setnames
#' @importFrom magrittr %>% #' @importFrom magrittr %>%
#' @importFrom stringi stri_detect_regex
#' @importFrom stringi stri_match_first_regex
#' @importFrom stringi stri_replace_first_regex
#' @importFrom stringi stri_replace_all_regex
#' @importFrom stringi stri_split_regex
#' @importFrom utils object.size str tail #' @importFrom utils object.size str tail
#' @importFrom stats predict #' @importFrom stats predict
#' @importFrom stats median #' @importFrom stats median

View File

@ -47,7 +47,7 @@ test_that("custom objective with early stop works", {
bst <- xgb.train(param, dtrain, 10, watchlist) bst <- xgb.train(param, dtrain, 10, watchlist)
expect_equal(class(bst), "xgb.Booster") expect_equal(class(bst), "xgb.Booster")
train_log <- bst$evaluation_log$train_error train_log <- bst$evaluation_log$train_error
expect_true(all(diff(train_log)) <= 0) expect_true(all(diff(train_log) <= 0))
}) })
test_that("custom objective using DMatrix attr works", { test_that("custom objective using DMatrix attr works", {

View File

@ -12,4 +12,5 @@ test_that("train and prediction when gctorture is on", {
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
pred <- predict(bst, test$data) pred <- predict(bst, test$data)
gctorture(FALSE) gctorture(FALSE)
expect_length(pred, length(test$label))
}) })

View File

@ -335,8 +335,8 @@ test_that("xgb.model.dt.tree and xgb.importance work with a single split model",
}) })
test_that("xgb.plot.tree works with and without feature names", { test_that("xgb.plot.tree works with and without feature names", {
xgb.plot.tree(feature_names = feature.names, model = bst.Tree) expect_silent(xgb.plot.tree(feature_names = feature.names, model = bst.Tree))
xgb.plot.tree(model = bst.Tree) expect_silent(xgb.plot.tree(model = bst.Tree))
}) })
test_that("xgb.plot.multi.trees works with and without feature names", { test_that("xgb.plot.multi.trees works with and without feature names", {
@ -390,8 +390,8 @@ test_that("xgb.plot.shap works", {
}) })
test_that("xgb.plot.shap.summary works", { test_that("xgb.plot.shap.summary works", {
xgb.plot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2) expect_silent(xgb.plot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2))
xgb.ggplot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2) expect_silent(xgb.ggplot.shap.summary(data = sparse_matrix, model = bst.Tree, top_n = 2))
}) })
test_that("check.deprecation works", { test_that("check.deprecation works", {