From c35be9dc40459d33eba2b66e06811fcb46fb327e Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 11 Jun 2020 11:08:02 +0100 Subject: [PATCH] [R] replace uses of T and F with TRUE and FALSE (#5778) * [R-package] replace uses of T and F with TRUE and FALSE * enable linting * Remove skip Co-authored-by: Philip Hyunsu Cho --- R-package/demo/basic_walkthrough.R | 2 +- R-package/demo/caret_wrapper.R | 2 +- R-package/demo/create_sparse_matrix.R | 2 +- R-package/demo/interaction_constraints.R | 8 ++++---- R-package/demo/tweedie_regression.R | 2 +- R-package/tests/testthat/test_helpers.R | 10 +++++----- R-package/tests/testthat/test_interactions.R | 2 +- R-package/tests/testthat/test_lint.R | 7 +++---- R-package/vignettes/discoverYourData.Rmd | 2 +- R-package/vignettes/xgboostPresentation.Rmd | 2 +- demo/data/gen_autoclaims.R | 2 +- demo/kaggle-otto/otto_train_pred.R | 4 ++-- demo/kaggle-otto/understandingXGBoostModel.Rmd | 16 ++++++++-------- doc/R-package/discoverYourData.md | 2 +- doc/R-package/xgboostPresentation.md | 2 +- 15 files changed, 32 insertions(+), 33 deletions(-) diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R index bb6b85037..914d8b5a0 100644 --- a/R-package/demo/basic_walkthrough.R +++ b/R-package/demo/basic_walkthrough.R @@ -100,7 +100,7 @@ print(paste("test-error=", err)) # You can dump the tree you learned using xgb.dump into a text file dump_path = file.path(tempdir(), 'dump.raw.txt') -xgb.dump(bst, dump_path, with_stats = T) +xgb.dump(bst, dump_path, with_stats = TRUE) # Finally, you can check which features are the most important. print("Most important features (look at column Gain):") diff --git a/R-package/demo/caret_wrapper.R b/R-package/demo/caret_wrapper.R index 751b202b5..9ab5933d2 100644 --- a/R-package/demo/caret_wrapper.R +++ b/R-package/demo/caret_wrapper.R @@ -9,7 +9,7 @@ require(e1071) # Load Arthritis dataset in memory. data(Arthritis) # Create a copy of the dataset with data.table package (data.table is 100% compliant with R dataframe but its syntax is a lot more consistent and its performance are really good). -df <- data.table(Arthritis, keep.rownames = F) +df <- data.table(Arthritis, keep.rownames = FALSE) # Let's add some new categorical features to see if it helps. Of course these feature are highly correlated to the Age feature. Usually it's not a good thing in ML, but Tree algorithms (including boosted trees) are able to select the best features, even in case of highly correlated features. # For the first feature we create groups of age by rounding the real age. Note that we transform it to factor (categorical data) so the algorithm treat them as independant values. diff --git a/R-package/demo/create_sparse_matrix.R b/R-package/demo/create_sparse_matrix.R index 6069f33d4..63d1a5b2f 100644 --- a/R-package/demo/create_sparse_matrix.R +++ b/R-package/demo/create_sparse_matrix.R @@ -19,7 +19,7 @@ if (!require(vcd)) { data(Arthritis) # create a copy of the dataset with data.table package (data.table is 100% compliant with R dataframe but its syntax is a lot more consistent and its performance are really good). -df <- data.table(Arthritis, keep.rownames = F) +df <- data.table(Arthritis, keep.rownames = FALSE) # Let's have a look to the data.table cat("Print the dataset\n") diff --git a/R-package/demo/interaction_constraints.R b/R-package/demo/interaction_constraints.R index 2f2edb155..41c32d0e8 100644 --- a/R-package/demo/interaction_constraints.R +++ b/R-package/demo/interaction_constraints.R @@ -19,18 +19,18 @@ treeInteractions <- function(input_tree, input_max_depth){ setorderv(parents_left, 'ID_merge') setorderv(parents_right, 'ID_merge') - trees <- merge(trees, parents_left, by='ID_merge', all.x=T) + trees <- merge(trees, parents_left, by='ID_merge', all.x=TRUE) trees[!is.na(i.id), c(paste0('parent_', i-1), paste0('parent_feat_', i-1)):=list(i.id, i.feature)] trees[, c('i.id','i.feature'):=NULL] - trees <- merge(trees, parents_right, by='ID_merge', all.x=T) + trees <- merge(trees, parents_right, by='ID_merge', all.x=TRUE) trees[!is.na(i.id), c(paste0('parent_', i-1), paste0('parent_feat_', i-1)):=list(i.id, i.feature)] trees[, c('i.id','i.feature'):=NULL] } # Extract nodes with interactions interaction_trees <- trees[!is.na(Split) & !is.na(parent_1), - c('Feature',paste0('parent_feat_',1:(input_max_depth-1))), with=F] + c('Feature',paste0('parent_feat_',1:(input_max_depth-1))), with=FALSE] interaction_trees_split <- split(interaction_trees, 1:nrow(interaction_trees)) interaction_list <- lapply(interaction_trees_split, as.character) @@ -96,7 +96,7 @@ x1 <- sort(unique(x[['V1']])) for (i in 1:length(x1)){ testdata <- copy(x[, -c('V1')]) testdata[['V1']] <- x1[i] - testdata <- testdata[, paste0('V',1:10), with=F] + testdata <- testdata[, paste0('V',1:10), with=FALSE] pred <- predict(bst3, as.matrix(testdata)) # Should not print out anything due to monotonic constraints diff --git a/R-package/demo/tweedie_regression.R b/R-package/demo/tweedie_regression.R index 4d272f696..8b84ed555 100755 --- a/R-package/demo/tweedie_regression.R +++ b/R-package/demo/tweedie_regression.R @@ -13,7 +13,7 @@ exclude <- c('POLICYNO', 'PLCYDATE', 'CLM_FREQ5', 'CLM_AMT5', 'CLM_FLAG', 'IN_Y # retains the missing values # NOTE: this dataset is comes ready out of the box options(na.action = 'na.pass') -x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = F]) +x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = FALSE]) options(na.action = 'na.omit') # response diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 5c14d5318..4a44f333c 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -12,7 +12,7 @@ flag_32bit = .Machine$sizeof.pointer != 8 set.seed(1982) data(Arthritis) -df <- data.table(Arthritis, keep.rownames = F) +df <- data.table(Arthritis, keep.rownames = FALSE) df[,AgeDiscret := as.factor(round(Age / 10,0))] df[,AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))] df[,ID := NULL] @@ -47,7 +47,7 @@ test_that("xgb.dump works", { if (!flag_32bit) expect_length(xgb.dump(bst.Tree), 200) dump_file = file.path(tempdir(), 'xgb.model.dump') - expect_true(xgb.dump(bst.Tree, dump_file, with_stats = T)) + expect_true(xgb.dump(bst.Tree, dump_file, with_stats = TRUE)) expect_true(file.exists(dump_file)) expect_gt(file.size(dump_file), 8000) @@ -160,7 +160,7 @@ test_that("SHAPs sum to predictions, with or without DART", { objective = "reg:squarederror", eval_metric = "rmse"), if (booster == "dart") - list(rate_drop = .01, one_drop = T)), + list(rate_drop = .01, one_drop = TRUE)), data = d, label = y, nrounds = nrounds) @@ -168,8 +168,8 @@ test_that("SHAPs sum to predictions, with or without DART", { pr <- function(...) predict(fit, newdata = d, ...) pred <- pr() - shap <- pr(predcontrib = T) - shapi <- pr(predinteraction = T) + shap <- pr(predcontrib = TRUE) + shapi <- pr(predinteraction = TRUE) tol = 1e-5 expect_equal(rowSums(shap), pred, tol = tol) diff --git a/R-package/tests/testthat/test_interactions.R b/R-package/tests/testthat/test_interactions.R index 20ee90c2b..ee35339a3 100644 --- a/R-package/tests/testthat/test_interactions.R +++ b/R-package/tests/testthat/test_interactions.R @@ -107,7 +107,7 @@ test_that("SHAP contribution values are not NAN", { shaps <- as.data.frame(predict(fit, newdata = as.matrix(subset(d, fold == 1)[, ivs]), - predcontrib = T)) + predcontrib = TRUE)) result <- cbind(shaps, sum = rowSums(shaps), pred = predict(fit, newdata = as.matrix(subset(d, fold == 1)[, ivs]))) diff --git a/R-package/tests/testthat/test_lint.R b/R-package/tests/testthat/test_lint.R index 2f2a07d54..168eb0fc8 100644 --- a/R-package/tests/testthat/test_lint.R +++ b/R-package/tests/testthat/test_lint.R @@ -1,8 +1,6 @@ context("Code is of high quality and lint free") test_that("Code Lint", { skip_on_cran() - skip_on_travis() - skip_if_not_installed("lintr") my_linters <- list( absolute_paths_linter=lintr::absolute_paths_linter, assignment_linter=lintr::assignment_linter, @@ -21,7 +19,8 @@ test_that("Code Lint", { spaces_inside_linter=lintr::spaces_inside_linter, spaces_left_parentheses_linter=lintr::spaces_left_parentheses_linter, trailing_blank_lines_linter=lintr::trailing_blank_lines_linter, - trailing_whitespace_linter=lintr::trailing_whitespace_linter + trailing_whitespace_linter=lintr::trailing_whitespace_linter, + true_false=lintr::T_and_F_symbol_linter ) - # lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality + lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality }) diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd index 67b734009..8181fcbb9 100644 --- a/R-package/vignettes/discoverYourData.Rmd +++ b/R-package/vignettes/discoverYourData.Rmd @@ -63,7 +63,7 @@ The first step is to load `Arthritis` dataset in memory and wrap it with `data.t ```{r, results='hide'} data(Arthritis) -df <- data.table(Arthritis, keep.rownames = F) +df <- data.table(Arthritis, keep.rownames = FALSE) ``` > `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`. diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd index 6d1bab706..c2f990e14 100644 --- a/R-package/vignettes/xgboostPresentation.Rmd +++ b/R-package/vignettes/xgboostPresentation.Rmd @@ -363,7 +363,7 @@ xgb.plot.importance(importance_matrix = importance_matrix) You can dump the tree you learned using `xgb.dump` into a text file. ```{r dump, message=T, warning=F} -xgb.dump(bst, with_stats = T) +xgb.dump(bst, with_stats = TRUE) ``` You can plot the trees from your model using ```xgb.plot.tree`` diff --git a/demo/data/gen_autoclaims.R b/demo/data/gen_autoclaims.R index 5465db09c..4723c1dd0 100644 --- a/demo/data/gen_autoclaims.R +++ b/demo/data/gen_autoclaims.R @@ -14,5 +14,5 @@ data$STATE = as.factor(data$STATE) data$CLASS = as.factor(data$CLASS) data$GENDER = as.factor(data$GENDER) -data.dummy <- dummy.data.frame(data, dummy.class='factor', omit.constants=T); +data.dummy <- dummy.data.frame(data, dummy.class='factor', omit.constants=TRUE); write.table(data.dummy, 'autoclaims.csv', sep=',', row.names=F, col.names=F, quote=F) diff --git a/demo/kaggle-otto/otto_train_pred.R b/demo/kaggle-otto/otto_train_pred.R index ec0f85ea7..02989db9b 100644 --- a/demo/kaggle-otto/otto_train_pred.R +++ b/demo/kaggle-otto/otto_train_pred.R @@ -1,8 +1,8 @@ require(xgboost) require(methods) -train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = F) -test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = F) +train = read.csv('data/train.csv',header=TRUE,stringsAsFactors = FALSE) +test = read.csv('data/test.csv',header=TRUE,stringsAsFactors = FALSE) train = train[,-1] test = test[,-1] diff --git a/demo/kaggle-otto/understandingXGBoostModel.Rmd b/demo/kaggle-otto/understandingXGBoostModel.Rmd index b37c407fa..c5776e005 100644 --- a/demo/kaggle-otto/understandingXGBoostModel.Rmd +++ b/demo/kaggle-otto/understandingXGBoostModel.Rmd @@ -30,8 +30,8 @@ require(xgboost) require(methods) require(data.table) require(magrittr) -train <- fread('data/train.csv', header = T, stringsAsFactors = F) -test <- fread('data/test.csv', header=TRUE, stringsAsFactors = F) +train <- fread('data/train.csv', header = T, stringsAsFactors = FALSE) +test <- fread('data/test.csv', header=TRUE, stringsAsFactors = FALSE) ``` > `magrittr` and `data.table` are here to make the code cleaner and much more rapid. @@ -42,13 +42,13 @@ Let's explore the dataset. dim(train) # Training content -train[1:6,1:5, with =F] +train[1:6,1:5, with =FALSE] # Test dataset dimensions dim(test) # Test content -test[1:6,1:5, with =F] +test[1:6,1:5, with =FALSE] ``` > We only display the 6 first rows and 5 first columns for convenience @@ -70,7 +70,7 @@ According to its description, the **Otto** challenge is a multi class classifica ```{r searchLabel} # Check the content of the last column -train[1:6, ncol(train), with = F] +train[1:6, ncol(train), with = FALSE] # Save the name of the last column nameLastCol <- names(train)[ncol(train)] ``` @@ -86,7 +86,7 @@ For that purpose, we will: ```{r classToIntegers} # Convert from classes to numbers -y <- train[, nameLastCol, with = F][[1]] %>% gsub('Class_','',.) %>% {as.integer(.) -1} +y <- train[, nameLastCol, with = FALSE][[1]] %>% gsub('Class_','',.) %>% {as.integer(.) -1} # Display the first 5 levels y[1:5] @@ -95,7 +95,7 @@ y[1:5] We remove label column from training dataset, otherwise **XGBoost** would use it to guess the labels! ```{r deleteCols, results='hide'} -train[, nameLastCol:=NULL, with = F] +train[, nameLastCol:=NULL, with = FALSE] ``` `data.table` is an awesome implementation of data.frame, unfortunately it is not a format supported natively by **XGBoost**. We need to convert both datasets (training and test) in `numeric` Matrix format. @@ -163,7 +163,7 @@ Each *split* is done on one feature only at one value. Let's see what the model looks like. ```{r modelDump} -model <- xgb.dump(bst, with.stats = T) +model <- xgb.dump(bst, with.stats = TRUE) model[1:10] ``` > For convenience, we are displaying the first 10 lines of the model only. diff --git a/doc/R-package/discoverYourData.md b/doc/R-package/discoverYourData.md index bffdcd559..de7b2823e 100644 --- a/doc/R-package/discoverYourData.md +++ b/doc/R-package/discoverYourData.md @@ -52,7 +52,7 @@ The first step is to load `Arthritis` dataset in memory and wrap it with `data.t ```r data(Arthritis) -df <- data.table(Arthritis, keep.rownames = F) +df <- data.table(Arthritis, keep.rownames = FALSE) ``` > `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`. diff --git a/doc/R-package/xgboostPresentation.md b/doc/R-package/xgboostPresentation.md index 5b9d63599..1e01b099f 100644 --- a/doc/R-package/xgboostPresentation.md +++ b/doc/R-package/xgboostPresentation.md @@ -489,7 +489,7 @@ You can dump the tree you learned using `xgb.dump` into a text file. ```r -xgb.dump(bst, with_stats = T) +xgb.dump(bst, with_stats = TRUE) ``` ```