[R] replace uses of T and F with TRUE and FALSE (#5778)

* [R-package] replace uses of T and F with TRUE and FALSE

* enable linting

* Remove skip

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
James Lamb
2020-06-11 11:08:02 +01:00
committed by GitHub
parent cb7f7e542c
commit c35be9dc40
15 changed files with 32 additions and 33 deletions

View File

@@ -100,7 +100,7 @@ print(paste("test-error=", err))
# You can dump the tree you learned using xgb.dump into a text file
dump_path = file.path(tempdir(), 'dump.raw.txt')
xgb.dump(bst, dump_path, with_stats = T)
xgb.dump(bst, dump_path, with_stats = TRUE)
# Finally, you can check which features are the most important.
print("Most important features (look at column Gain):")

View File

@@ -9,7 +9,7 @@ require(e1071)
# Load Arthritis dataset in memory.
data(Arthritis)
# Create a copy of the dataset with data.table package (data.table is 100% compliant with R dataframe but its syntax is a lot more consistent and its performance are really good).
df <- data.table(Arthritis, keep.rownames = F)
df <- data.table(Arthritis, keep.rownames = FALSE)
# Let's add some new categorical features to see if it helps. Of course these feature are highly correlated to the Age feature. Usually it's not a good thing in ML, but Tree algorithms (including boosted trees) are able to select the best features, even in case of highly correlated features.
# For the first feature we create groups of age by rounding the real age. Note that we transform it to factor (categorical data) so the algorithm treat them as independant values.

View File

@@ -19,7 +19,7 @@ if (!require(vcd)) {
data(Arthritis)
# create a copy of the dataset with data.table package (data.table is 100% compliant with R dataframe but its syntax is a lot more consistent and its performance are really good).
df <- data.table(Arthritis, keep.rownames = F)
df <- data.table(Arthritis, keep.rownames = FALSE)
# Let's have a look to the data.table
cat("Print the dataset\n")

View File

@@ -19,18 +19,18 @@ treeInteractions <- function(input_tree, input_max_depth){
setorderv(parents_left, 'ID_merge')
setorderv(parents_right, 'ID_merge')
trees <- merge(trees, parents_left, by='ID_merge', all.x=T)
trees <- merge(trees, parents_left, by='ID_merge', all.x=TRUE)
trees[!is.na(i.id), c(paste0('parent_', i-1), paste0('parent_feat_', i-1)):=list(i.id, i.feature)]
trees[, c('i.id','i.feature'):=NULL]
trees <- merge(trees, parents_right, by='ID_merge', all.x=T)
trees <- merge(trees, parents_right, by='ID_merge', all.x=TRUE)
trees[!is.na(i.id), c(paste0('parent_', i-1), paste0('parent_feat_', i-1)):=list(i.id, i.feature)]
trees[, c('i.id','i.feature'):=NULL]
}
# Extract nodes with interactions
interaction_trees <- trees[!is.na(Split) & !is.na(parent_1),
c('Feature',paste0('parent_feat_',1:(input_max_depth-1))), with=F]
c('Feature',paste0('parent_feat_',1:(input_max_depth-1))), with=FALSE]
interaction_trees_split <- split(interaction_trees, 1:nrow(interaction_trees))
interaction_list <- lapply(interaction_trees_split, as.character)
@@ -96,7 +96,7 @@ x1 <- sort(unique(x[['V1']]))
for (i in 1:length(x1)){
testdata <- copy(x[, -c('V1')])
testdata[['V1']] <- x1[i]
testdata <- testdata[, paste0('V',1:10), with=F]
testdata <- testdata[, paste0('V',1:10), with=FALSE]
pred <- predict(bst3, as.matrix(testdata))
# Should not print out anything due to monotonic constraints

View File

@@ -13,7 +13,7 @@ exclude <- c('POLICYNO', 'PLCYDATE', 'CLM_FREQ5', 'CLM_AMT5', 'CLM_FLAG', 'IN_Y
# retains the missing values
# NOTE: this dataset is comes ready out of the box
options(na.action = 'na.pass')
x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = F])
x <- sparse.model.matrix(~ . - 1, data = dt[, -exclude, with = FALSE])
options(na.action = 'na.omit')
# response

View File

@@ -12,7 +12,7 @@ flag_32bit = .Machine$sizeof.pointer != 8
set.seed(1982)
data(Arthritis)
df <- data.table(Arthritis, keep.rownames = F)
df <- data.table(Arthritis, keep.rownames = FALSE)
df[,AgeDiscret := as.factor(round(Age / 10,0))]
df[,AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
df[,ID := NULL]
@@ -47,7 +47,7 @@ test_that("xgb.dump works", {
if (!flag_32bit)
expect_length(xgb.dump(bst.Tree), 200)
dump_file = file.path(tempdir(), 'xgb.model.dump')
expect_true(xgb.dump(bst.Tree, dump_file, with_stats = T))
expect_true(xgb.dump(bst.Tree, dump_file, with_stats = TRUE))
expect_true(file.exists(dump_file))
expect_gt(file.size(dump_file), 8000)
@@ -160,7 +160,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
objective = "reg:squarederror",
eval_metric = "rmse"),
if (booster == "dart")
list(rate_drop = .01, one_drop = T)),
list(rate_drop = .01, one_drop = TRUE)),
data = d,
label = y,
nrounds = nrounds)
@@ -168,8 +168,8 @@ test_that("SHAPs sum to predictions, with or without DART", {
pr <- function(...)
predict(fit, newdata = d, ...)
pred <- pr()
shap <- pr(predcontrib = T)
shapi <- pr(predinteraction = T)
shap <- pr(predcontrib = TRUE)
shapi <- pr(predinteraction = TRUE)
tol = 1e-5
expect_equal(rowSums(shap), pred, tol = tol)

View File

@@ -107,7 +107,7 @@ test_that("SHAP contribution values are not NAN", {
shaps <- as.data.frame(predict(fit,
newdata = as.matrix(subset(d, fold == 1)[, ivs]),
predcontrib = T))
predcontrib = TRUE))
result <- cbind(shaps, sum = rowSums(shaps), pred = predict(fit,
newdata = as.matrix(subset(d, fold == 1)[, ivs])))

View File

@@ -1,8 +1,6 @@
context("Code is of high quality and lint free")
test_that("Code Lint", {
skip_on_cran()
skip_on_travis()
skip_if_not_installed("lintr")
my_linters <- list(
absolute_paths_linter=lintr::absolute_paths_linter,
assignment_linter=lintr::assignment_linter,
@@ -21,7 +19,8 @@ test_that("Code Lint", {
spaces_inside_linter=lintr::spaces_inside_linter,
spaces_left_parentheses_linter=lintr::spaces_left_parentheses_linter,
trailing_blank_lines_linter=lintr::trailing_blank_lines_linter,
trailing_whitespace_linter=lintr::trailing_whitespace_linter
trailing_whitespace_linter=lintr::trailing_whitespace_linter,
true_false=lintr::T_and_F_symbol_linter
)
# lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality
lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality
})

View File

@@ -63,7 +63,7 @@ The first step is to load `Arthritis` dataset in memory and wrap it with `data.t
```{r, results='hide'}
data(Arthritis)
df <- data.table(Arthritis, keep.rownames = F)
df <- data.table(Arthritis, keep.rownames = FALSE)
```
> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.

View File

@@ -363,7 +363,7 @@ xgb.plot.importance(importance_matrix = importance_matrix)
You can dump the tree you learned using `xgb.dump` into a text file.
```{r dump, message=T, warning=F}
xgb.dump(bst, with_stats = T)
xgb.dump(bst, with_stats = TRUE)
```
You can plot the trees from your model using ```xgb.plot.tree``