[R] discourage use of regex for fixed string comparisons (#8736)

This commit is contained in:
James Lamb 2023-01-30 04:47:21 -06:00 committed by GitHub
parent 1325ba9251
commit 0d8248ddcd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 20 additions and 19 deletions

View File

@ -114,7 +114,7 @@ cb.evaluation.log <- function() {
if (is.null(mnames) || any(mnames == "")) if (is.null(mnames) || any(mnames == ""))
stop("bst_evaluation must have non-empty names") stop("bst_evaluation must have non-empty names")
mnames <<- gsub('-', '_', names(env$bst_evaluation)) mnames <<- gsub('-', '_', names(env$bst_evaluation), fixed = TRUE)
if (!is.null(env$bst_evaluation_err)) if (!is.null(env$bst_evaluation_err))
mnames <<- c(paste0(mnames, '_mean'), paste0(mnames, '_std')) mnames <<- c(paste0(mnames, '_mean'), paste0(mnames, '_std'))
} }
@ -185,7 +185,7 @@ cb.reset.parameters <- function(new_params) {
if (typeof(new_params) != "list") if (typeof(new_params) != "list")
stop("'new_params' must be a list") stop("'new_params' must be a list")
pnames <- gsub("\\.", "_", names(new_params)) pnames <- gsub(".", "_", names(new_params), fixed = TRUE)
nrounds <- NULL nrounds <- NULL
# run some checks in the beginning # run some checks in the beginning
@ -300,9 +300,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
if (length(env$bst_evaluation) == 0) if (length(env$bst_evaluation) == 0)
stop("For early stopping, watchlist must have at least one element") stop("For early stopping, watchlist must have at least one element")
eval_names <- gsub('-', '_', names(env$bst_evaluation)) eval_names <- gsub('-', '_', names(env$bst_evaluation), fixed = TRUE)
if (!is.null(metric_name)) { if (!is.null(metric_name)) {
metric_idx <<- which(gsub('-', '_', metric_name) == eval_names) metric_idx <<- which(gsub('-', '_', metric_name, fixed = TRUE) == eval_names)
if (length(metric_idx) == 0) if (length(metric_idx) == 0)
stop("'metric_name' for early stopping is not one of the following:\n", stop("'metric_name' for early stopping is not one of the following:\n",
paste(eval_names, collapse = ' '), '\n') paste(eval_names, collapse = ' '), '\n')

View File

@ -38,11 +38,11 @@ check.booster.params <- function(params, ...) {
stop("params must be a list") stop("params must be a list")
# in R interface, allow for '.' instead of '_' in parameter names # in R interface, allow for '.' instead of '_' in parameter names
names(params) <- gsub("\\.", "_", names(params)) names(params) <- gsub(".", "_", names(params), fixed = TRUE)
# merge parameters from the params and the dots-expansion # merge parameters from the params and the dots-expansion
dot_params <- list(...) dot_params <- list(...)
names(dot_params) <- gsub("\\.", "_", names(dot_params)) names(dot_params) <- gsub(".", "_", names(dot_params), fixed = TRUE)
if (length(intersect(names(params), if (length(intersect(names(params),
names(dot_params))) > 0) names(dot_params))) > 0)
stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.") stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.")

View File

@ -672,7 +672,7 @@ xgb.config <- function(object) {
if (is.null(names(p)) || any(nchar(names(p)) == 0)) { if (is.null(names(p)) || any(nchar(names(p)) == 0)) {
stop("parameter names cannot be empty strings") stop("parameter names cannot be empty strings")
} }
names(p) <- gsub("\\.", "_", names(p)) names(p) <- gsub(".", "_", names(p), fixed = TRUE)
p <- lapply(p, function(x) as.character(x)[1]) p <- lapply(p, function(x) as.character(x)[1])
handle <- xgb.get.handle(object) handle <- xgb.get.handle(object)
for (i in seq_along(p)) { for (i in seq_along(p)) {

View File

@ -79,9 +79,9 @@ end_of_table <- empty_lines[empty_lines > start_index][1L]
# Read the contents of the table # Read the contents of the table
exported_symbols <- objdump_results[(start_index + 1L):end_of_table] exported_symbols <- objdump_results[(start_index + 1L):end_of_table]
exported_symbols <- gsub("\t", "", exported_symbols) exported_symbols <- gsub("\t", "", exported_symbols, fixed = TRUE)
exported_symbols <- gsub(".*\\] ", "", exported_symbols) exported_symbols <- gsub(".*\\] ", "", exported_symbols)
exported_symbols <- gsub(" ", "", exported_symbols) exported_symbols <- gsub(" ", "", exported_symbols, fixed = TRUE)
# Write R.def file # Write R.def file
writeLines( writeLines(

View File

@ -63,7 +63,7 @@ test_that("xgb.dump works", {
dmp <- xgb.dump(bst.Tree, dump_format = "json") dmp <- xgb.dump(bst.Tree, dump_format = "json")
expect_length(dmp, 1) expect_length(dmp, 1)
if (!flag_32bit) if (!flag_32bit)
expect_length(grep('nodeid', strsplit(dmp, '\n')[[1]]), 188) expect_length(grep('nodeid', strsplit(dmp, '\n', fixed = TRUE)[[1]], fixed = TRUE), 188)
}) })
test_that("xgb.dump works for gblinear", { test_that("xgb.dump works for gblinear", {
@ -80,7 +80,7 @@ test_that("xgb.dump works for gblinear", {
# JSON format # JSON format
dmp <- xgb.dump(bst.GLM.sp, dump_format = "json") dmp <- xgb.dump(bst.GLM.sp, dump_format = "json")
expect_length(dmp, 1) expect_length(dmp, 1)
expect_length(grep('\\d', strsplit(dmp, '\n')[[1]]), 11) expect_length(grep('\\d', strsplit(dmp, '\n', fixed = TRUE)[[1]]), 11)
}) })
test_that("predict leafs works", { test_that("predict leafs works", {
@ -231,9 +231,9 @@ test_that("xgb-attribute functionality", {
expect_null(xgb.attributes(bst)) expect_null(xgb.attributes(bst))
}) })
if (grepl('Windows', Sys.info()[['sysname']]) || if (grepl('Windows', Sys.info()[['sysname']], fixed = TRUE) ||
grepl('Linux', Sys.info()[['sysname']]) || grepl('Linux', Sys.info()[['sysname']], fixed = TRUE) ||
grepl('Darwin', Sys.info()[['sysname']])) { grepl('Darwin', Sys.info()[['sysname']], fixed = TRUE)) {
test_that("xgb-attribute numeric precision", { test_that("xgb-attribute numeric precision", {
.skip_if_vcd_not_available() .skip_if_vcd_not_available()
# check that lossless conversion works with 17 digits # check that lossless conversion works with 17 digits
@ -293,9 +293,9 @@ test_that("xgb.model.dt.tree works with and without feature names", {
# using integer node ID instead of character # using integer node ID instead of character
dt.tree.int <- xgb.model.dt.tree(model = bst.Tree, use_int_id = TRUE) dt.tree.int <- xgb.model.dt.tree(model = bst.Tree, use_int_id = TRUE)
expect_equal(as.integer(data.table::tstrsplit(dt.tree$Yes, '-')[[2]]), dt.tree.int$Yes) expect_equal(as.integer(data.table::tstrsplit(dt.tree$Yes, '-', fixed = TRUE)[[2]]), dt.tree.int$Yes)
expect_equal(as.integer(data.table::tstrsplit(dt.tree$No, '-')[[2]]), dt.tree.int$No) expect_equal(as.integer(data.table::tstrsplit(dt.tree$No, '-', fixed = TRUE)[[2]]), dt.tree.int$No)
expect_equal(as.integer(data.table::tstrsplit(dt.tree$Missing, '-')[[2]]), dt.tree.int$Missing) expect_equal(as.integer(data.table::tstrsplit(dt.tree$Missing, '-', fixed = TRUE)[[2]]), dt.tree.int$Missing)
}) })
test_that("xgb.model.dt.tree throws error for gblinear", { test_that("xgb.model.dt.tree throws error for gblinear", {

View File

@ -7,7 +7,7 @@ train <- train[, -1]
test <- test[, -1] test <- test[, -1]
y <- train[, ncol(train)] y <- train[, ncol(train)]
y <- gsub('Class_', '', y) y <- gsub('Class_', '', y, fixed = TRUE)
y <- as.integer(y) - 1 # xgboost take features in [0,numOfClass) y <- as.integer(y) - 1 # xgboost take features in [0,numOfClass)
x <- rbind(train[, -ncol(train)], test) x <- rbind(train[, -ncol(train)], test)

View File

@ -87,7 +87,7 @@ For that purpose, we will:
```{r classToIntegers} ```{r classToIntegers}
# Convert from classes to numbers # Convert from classes to numbers
y <- train[, nameLastCol, with = FALSE][[1]] %>% y <- train[, nameLastCol, with = FALSE][[1]] %>%
gsub('Class_', '', .) %>% gsub('Class_', '', ., fixed = TRUE) %>%
as.integer %>% as.integer %>%
subtract(., 1) subtract(., 1)

View File

@ -23,6 +23,7 @@ my_linters <- list(
brace_linter = lintr::brace_linter(), brace_linter = lintr::brace_linter(),
commas_linter = lintr::commas_linter(), commas_linter = lintr::commas_linter(),
equals_na = lintr::equals_na_linter(), equals_na = lintr::equals_na_linter(),
fixed_regex = lintr::fixed_regex_linter(),
infix_spaces_linter = lintr::infix_spaces_linter(), infix_spaces_linter = lintr::infix_spaces_linter(),
line_length_linter = lintr::line_length_linter(length = 150L), line_length_linter = lintr::line_length_linter(length = 150L),
no_tab_linter = lintr::no_tab_linter(), no_tab_linter = lintr::no_tab_linter(),