[R] discourage use of regex for fixed string comparisons (#8736)

This commit is contained in:
James Lamb 2023-01-30 04:47:21 -06:00 committed by GitHub
parent 1325ba9251
commit 0d8248ddcd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 20 additions and 19 deletions

View File

@ -114,7 +114,7 @@ cb.evaluation.log <- function() {
if (is.null(mnames) || any(mnames == ""))
stop("bst_evaluation must have non-empty names")
mnames <<- gsub('-', '_', names(env$bst_evaluation))
mnames <<- gsub('-', '_', names(env$bst_evaluation), fixed = TRUE)
if (!is.null(env$bst_evaluation_err))
mnames <<- c(paste0(mnames, '_mean'), paste0(mnames, '_std'))
}
@ -185,7 +185,7 @@ cb.reset.parameters <- function(new_params) {
if (typeof(new_params) != "list")
stop("'new_params' must be a list")
pnames <- gsub("\\.", "_", names(new_params))
pnames <- gsub(".", "_", names(new_params), fixed = TRUE)
nrounds <- NULL
# run some checks in the beginning
@ -300,9 +300,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
if (length(env$bst_evaluation) == 0)
stop("For early stopping, watchlist must have at least one element")
eval_names <- gsub('-', '_', names(env$bst_evaluation))
eval_names <- gsub('-', '_', names(env$bst_evaluation), fixed = TRUE)
if (!is.null(metric_name)) {
metric_idx <<- which(gsub('-', '_', metric_name) == eval_names)
metric_idx <<- which(gsub('-', '_', metric_name, fixed = TRUE) == eval_names)
if (length(metric_idx) == 0)
stop("'metric_name' for early stopping is not one of the following:\n",
paste(eval_names, collapse = ' '), '\n')

View File

@ -38,11 +38,11 @@ check.booster.params <- function(params, ...) {
stop("params must be a list")
# in R interface, allow for '.' instead of '_' in parameter names
names(params) <- gsub("\\.", "_", names(params))
names(params) <- gsub(".", "_", names(params), fixed = TRUE)
# merge parameters from the params and the dots-expansion
dot_params <- list(...)
names(dot_params) <- gsub("\\.", "_", names(dot_params))
names(dot_params) <- gsub(".", "_", names(dot_params), fixed = TRUE)
if (length(intersect(names(params),
names(dot_params))) > 0)
stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.")

View File

@ -672,7 +672,7 @@ xgb.config <- function(object) {
if (is.null(names(p)) || any(nchar(names(p)) == 0)) {
stop("parameter names cannot be empty strings")
}
names(p) <- gsub("\\.", "_", names(p))
names(p) <- gsub(".", "_", names(p), fixed = TRUE)
p <- lapply(p, function(x) as.character(x)[1])
handle <- xgb.get.handle(object)
for (i in seq_along(p)) {

View File

@ -79,9 +79,9 @@ end_of_table <- empty_lines[empty_lines > start_index][1L]
# Read the contents of the table
exported_symbols <- objdump_results[(start_index + 1L):end_of_table]
exported_symbols <- gsub("\t", "", exported_symbols)
exported_symbols <- gsub("\t", "", exported_symbols, fixed = TRUE)
exported_symbols <- gsub(".*\\] ", "", exported_symbols)
exported_symbols <- gsub(" ", "", exported_symbols)
exported_symbols <- gsub(" ", "", exported_symbols, fixed = TRUE)
# Write R.def file
writeLines(

View File

@ -63,7 +63,7 @@ test_that("xgb.dump works", {
dmp <- xgb.dump(bst.Tree, dump_format = "json")
expect_length(dmp, 1)
if (!flag_32bit)
expect_length(grep('nodeid', strsplit(dmp, '\n')[[1]]), 188)
expect_length(grep('nodeid', strsplit(dmp, '\n', fixed = TRUE)[[1]], fixed = TRUE), 188)
})
test_that("xgb.dump works for gblinear", {
@ -80,7 +80,7 @@ test_that("xgb.dump works for gblinear", {
# JSON format
dmp <- xgb.dump(bst.GLM.sp, dump_format = "json")
expect_length(dmp, 1)
expect_length(grep('\\d', strsplit(dmp, '\n')[[1]]), 11)
expect_length(grep('\\d', strsplit(dmp, '\n', fixed = TRUE)[[1]]), 11)
})
test_that("predict leafs works", {
@ -231,9 +231,9 @@ test_that("xgb-attribute functionality", {
expect_null(xgb.attributes(bst))
})
if (grepl('Windows', Sys.info()[['sysname']]) ||
grepl('Linux', Sys.info()[['sysname']]) ||
grepl('Darwin', Sys.info()[['sysname']])) {
if (grepl('Windows', Sys.info()[['sysname']], fixed = TRUE) ||
grepl('Linux', Sys.info()[['sysname']], fixed = TRUE) ||
grepl('Darwin', Sys.info()[['sysname']], fixed = TRUE)) {
test_that("xgb-attribute numeric precision", {
.skip_if_vcd_not_available()
# check that lossless conversion works with 17 digits
@ -293,9 +293,9 @@ test_that("xgb.model.dt.tree works with and without feature names", {
# using integer node ID instead of character
dt.tree.int <- xgb.model.dt.tree(model = bst.Tree, use_int_id = TRUE)
expect_equal(as.integer(data.table::tstrsplit(dt.tree$Yes, '-')[[2]]), dt.tree.int$Yes)
expect_equal(as.integer(data.table::tstrsplit(dt.tree$No, '-')[[2]]), dt.tree.int$No)
expect_equal(as.integer(data.table::tstrsplit(dt.tree$Missing, '-')[[2]]), dt.tree.int$Missing)
expect_equal(as.integer(data.table::tstrsplit(dt.tree$Yes, '-', fixed = TRUE)[[2]]), dt.tree.int$Yes)
expect_equal(as.integer(data.table::tstrsplit(dt.tree$No, '-', fixed = TRUE)[[2]]), dt.tree.int$No)
expect_equal(as.integer(data.table::tstrsplit(dt.tree$Missing, '-', fixed = TRUE)[[2]]), dt.tree.int$Missing)
})
test_that("xgb.model.dt.tree throws error for gblinear", {

View File

@ -7,7 +7,7 @@ train <- train[, -1]
test <- test[, -1]
y <- train[, ncol(train)]
y <- gsub('Class_', '', y)
y <- gsub('Class_', '', y, fixed = TRUE)
y <- as.integer(y) - 1 # xgboost take features in [0,numOfClass)
x <- rbind(train[, -ncol(train)], test)

View File

@ -87,7 +87,7 @@ For that purpose, we will:
```{r classToIntegers}
# Convert from classes to numbers
y <- train[, nameLastCol, with = FALSE][[1]] %>%
gsub('Class_', '', .) %>%
gsub('Class_', '', ., fixed = TRUE) %>%
as.integer %>%
subtract(., 1)

View File

@ -23,6 +23,7 @@ my_linters <- list(
brace_linter = lintr::brace_linter(),
commas_linter = lintr::commas_linter(),
equals_na = lintr::equals_na_linter(),
fixed_regex = lintr::fixed_regex_linter(),
infix_spaces_linter = lintr::infix_spaces_linter(),
line_length_linter = lintr::line_length_linter(length = 150L),
no_tab_linter = lintr::no_tab_linter(),