From 0d8248ddcd99a8a8631d52c051e94a019bf4663c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 30 Jan 2023 04:47:21 -0600 Subject: [PATCH] [R] discourage use of regex for fixed string comparisons (#8736) --- R-package/R/callbacks.R | 8 ++++---- R-package/R/utils.R | 4 ++-- R-package/R/xgb.Booster.R | 2 +- R-package/inst/make-r-def.R | 4 ++-- R-package/tests/testthat/test_helpers.R | 16 ++++++++-------- demo/kaggle-otto/otto_train_pred.R | 2 +- demo/kaggle-otto/understandingXGBoostModel.Rmd | 2 +- tests/ci_build/lint_r.R | 1 + 8 files changed, 20 insertions(+), 19 deletions(-) diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index 3c4d2e13a..20fbd0617 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -114,7 +114,7 @@ cb.evaluation.log <- function() { if (is.null(mnames) || any(mnames == "")) stop("bst_evaluation must have non-empty names") - mnames <<- gsub('-', '_', names(env$bst_evaluation)) + mnames <<- gsub('-', '_', names(env$bst_evaluation), fixed = TRUE) if (!is.null(env$bst_evaluation_err)) mnames <<- c(paste0(mnames, '_mean'), paste0(mnames, '_std')) } @@ -185,7 +185,7 @@ cb.reset.parameters <- function(new_params) { if (typeof(new_params) != "list") stop("'new_params' must be a list") - pnames <- gsub("\\.", "_", names(new_params)) + pnames <- gsub(".", "_", names(new_params), fixed = TRUE) nrounds <- NULL # run some checks in the beginning @@ -300,9 +300,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE, if (length(env$bst_evaluation) == 0) stop("For early stopping, watchlist must have at least one element") - eval_names <- gsub('-', '_', names(env$bst_evaluation)) + eval_names <- gsub('-', '_', names(env$bst_evaluation), fixed = TRUE) if (!is.null(metric_name)) { - metric_idx <<- which(gsub('-', '_', metric_name) == eval_names) + metric_idx <<- which(gsub('-', '_', metric_name, fixed = TRUE) == eval_names) if (length(metric_idx) == 0) stop("'metric_name' for early stopping is not one of the following:\n", paste(eval_names, collapse = ' '), '\n') diff --git a/R-package/R/utils.R b/R-package/R/utils.R index e1f1df299..a822113a7 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -38,11 +38,11 @@ check.booster.params <- function(params, ...) { stop("params must be a list") # in R interface, allow for '.' instead of '_' in parameter names - names(params) <- gsub("\\.", "_", names(params)) + names(params) <- gsub(".", "_", names(params), fixed = TRUE) # merge parameters from the params and the dots-expansion dot_params <- list(...) - names(dot_params) <- gsub("\\.", "_", names(dot_params)) + names(dot_params) <- gsub(".", "_", names(dot_params), fixed = TRUE) if (length(intersect(names(params), names(dot_params))) > 0) stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.") diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index 51bf291ce..6fa07c27f 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -672,7 +672,7 @@ xgb.config <- function(object) { if (is.null(names(p)) || any(nchar(names(p)) == 0)) { stop("parameter names cannot be empty strings") } - names(p) <- gsub("\\.", "_", names(p)) + names(p) <- gsub(".", "_", names(p), fixed = TRUE) p <- lapply(p, function(x) as.character(x)[1]) handle <- xgb.get.handle(object) for (i in seq_along(p)) { diff --git a/R-package/inst/make-r-def.R b/R-package/inst/make-r-def.R index ef5c06cac..f40e54c56 100644 --- a/R-package/inst/make-r-def.R +++ b/R-package/inst/make-r-def.R @@ -79,9 +79,9 @@ end_of_table <- empty_lines[empty_lines > start_index][1L] # Read the contents of the table exported_symbols <- objdump_results[(start_index + 1L):end_of_table] -exported_symbols <- gsub("\t", "", exported_symbols) +exported_symbols <- gsub("\t", "", exported_symbols, fixed = TRUE) exported_symbols <- gsub(".*\\] ", "", exported_symbols) -exported_symbols <- gsub(" ", "", exported_symbols) +exported_symbols <- gsub(" ", "", exported_symbols, fixed = TRUE) # Write R.def file writeLines( diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 7315a2f87..b48c8c707 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -63,7 +63,7 @@ test_that("xgb.dump works", { dmp <- xgb.dump(bst.Tree, dump_format = "json") expect_length(dmp, 1) if (!flag_32bit) - expect_length(grep('nodeid', strsplit(dmp, '\n')[[1]]), 188) + expect_length(grep('nodeid', strsplit(dmp, '\n', fixed = TRUE)[[1]], fixed = TRUE), 188) }) test_that("xgb.dump works for gblinear", { @@ -80,7 +80,7 @@ test_that("xgb.dump works for gblinear", { # JSON format dmp <- xgb.dump(bst.GLM.sp, dump_format = "json") expect_length(dmp, 1) - expect_length(grep('\\d', strsplit(dmp, '\n')[[1]]), 11) + expect_length(grep('\\d', strsplit(dmp, '\n', fixed = TRUE)[[1]]), 11) }) test_that("predict leafs works", { @@ -231,9 +231,9 @@ test_that("xgb-attribute functionality", { expect_null(xgb.attributes(bst)) }) -if (grepl('Windows', Sys.info()[['sysname']]) || - grepl('Linux', Sys.info()[['sysname']]) || - grepl('Darwin', Sys.info()[['sysname']])) { +if (grepl('Windows', Sys.info()[['sysname']], fixed = TRUE) || + grepl('Linux', Sys.info()[['sysname']], fixed = TRUE) || + grepl('Darwin', Sys.info()[['sysname']], fixed = TRUE)) { test_that("xgb-attribute numeric precision", { .skip_if_vcd_not_available() # check that lossless conversion works with 17 digits @@ -293,9 +293,9 @@ test_that("xgb.model.dt.tree works with and without feature names", { # using integer node ID instead of character dt.tree.int <- xgb.model.dt.tree(model = bst.Tree, use_int_id = TRUE) - expect_equal(as.integer(data.table::tstrsplit(dt.tree$Yes, '-')[[2]]), dt.tree.int$Yes) - expect_equal(as.integer(data.table::tstrsplit(dt.tree$No, '-')[[2]]), dt.tree.int$No) - expect_equal(as.integer(data.table::tstrsplit(dt.tree$Missing, '-')[[2]]), dt.tree.int$Missing) + expect_equal(as.integer(data.table::tstrsplit(dt.tree$Yes, '-', fixed = TRUE)[[2]]), dt.tree.int$Yes) + expect_equal(as.integer(data.table::tstrsplit(dt.tree$No, '-', fixed = TRUE)[[2]]), dt.tree.int$No) + expect_equal(as.integer(data.table::tstrsplit(dt.tree$Missing, '-', fixed = TRUE)[[2]]), dt.tree.int$Missing) }) test_that("xgb.model.dt.tree throws error for gblinear", { diff --git a/demo/kaggle-otto/otto_train_pred.R b/demo/kaggle-otto/otto_train_pred.R index 9a5244754..0a0afbb12 100644 --- a/demo/kaggle-otto/otto_train_pred.R +++ b/demo/kaggle-otto/otto_train_pred.R @@ -7,7 +7,7 @@ train <- train[, -1] test <- test[, -1] y <- train[, ncol(train)] -y <- gsub('Class_', '', y) +y <- gsub('Class_', '', y, fixed = TRUE) y <- as.integer(y) - 1 # xgboost take features in [0,numOfClass) x <- rbind(train[, -ncol(train)], test) diff --git a/demo/kaggle-otto/understandingXGBoostModel.Rmd b/demo/kaggle-otto/understandingXGBoostModel.Rmd index d8f1ba663..688003f78 100644 --- a/demo/kaggle-otto/understandingXGBoostModel.Rmd +++ b/demo/kaggle-otto/understandingXGBoostModel.Rmd @@ -87,7 +87,7 @@ For that purpose, we will: ```{r classToIntegers} # Convert from classes to numbers y <- train[, nameLastCol, with = FALSE][[1]] %>% - gsub('Class_', '', .) %>% + gsub('Class_', '', ., fixed = TRUE) %>% as.integer %>% subtract(., 1) diff --git a/tests/ci_build/lint_r.R b/tests/ci_build/lint_r.R index 6c3e81773..9d50fe433 100644 --- a/tests/ci_build/lint_r.R +++ b/tests/ci_build/lint_r.R @@ -23,6 +23,7 @@ my_linters <- list( brace_linter = lintr::brace_linter(), commas_linter = lintr::commas_linter(), equals_na = lintr::equals_na_linter(), + fixed_regex = lintr::fixed_regex_linter(), infix_spaces_linter = lintr::infix_spaces_linter(), line_length_linter = lintr::line_length_linter(length = 150L), no_tab_linter = lintr::no_tab_linter(),