From 17ce1f26c86596a3987f92199b23ddd517ea3815 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sat, 17 Dec 2022 04:36:14 -0600
Subject: [PATCH] [R] address some lintr warnings (#8609)

---
 R-package/R/callbacks.R                       |  2 +-
 R-package/R/xgb.Booster.R                     |  2 +-
 R-package/R/xgb.cv.R                          |  6 +--
 R-package/R/xgb.dump.R                        |  2 +-
 R-package/demo/caret_wrapper.R                |  2 +-
 R-package/tests/testthat/test_basic.R         |  2 +-
 R-package/tests/testthat/test_dmatrix.R       |  2 +-
 R-package/tests/testthat/test_helpers.R       |  2 +-
 R-package/vignettes/discoverYourData.Rmd      | 24 ++++++-----
 R-package/vignettes/xgboostPresentation.Rmd   | 20 ++++-----
 R-package/vignettes/xgboostfromJSON.Rmd       | 36 ++++++++--------
 demo/data/gen_autoclaims.R                    | 25 ++++++++---
 demo/kaggle-higgs/higgs-pred.R                | 10 ++---
 demo/kaggle-higgs/higgs-train.R               |  8 ++--
 demo/kaggle-higgs/speedtest.R                 | 43 +++++++++----------
 demo/kaggle-otto/otto_train_pred.R            | 43 +++++++++++--------
 .../kaggle-otto/understandingXGBoostModel.Rmd | 20 ++++-----
 demo/multiclass_classification/train.R        |  4 +-
 18 files changed, 137 insertions(+), 116 deletions(-)

diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
index 957fca6d4..3c4d2e13a 100644
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -611,7 +611,7 @@ cb.cv.predict <- function(save_models = FALSE) {
 #' matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l')
 #'
 #' @export
-cb.gblinear.history <- function(sparse=FALSE) {
+cb.gblinear.history <- function(sparse = FALSE) {
   coefs <- NULL
 
   init <- function(env) {
diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index 2f1f5091c..46852cba8 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -629,7 +629,7 @@ xgb.attributes <- function(object) {
 #' @export
 xgb.config <- function(object) {
   handle <- xgb.get.handle(object)
-  .Call(XGBoosterSaveJsonConfig_R, handle);
+  .Call(XGBoosterSaveJsonConfig_R, handle)
 }
 
 #' @rdname xgb.config
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index f428f4285..27730cbc3 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -119,10 +119,10 @@
 #' print(cv, verbose=TRUE)
 #'
 #' @export
-xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
-                   prediction = FALSE, showsd = TRUE, metrics=list(),
+xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing = NA,
+                   prediction = FALSE, showsd = TRUE, metrics = list(),
                    obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, train_folds = NULL,
-                   verbose = TRUE, print_every_n=1L,
+                   verbose = TRUE, print_every_n = 1L,
                    early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) {
 
   check.deprecation(...)
diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R
index b5b8b7eb4..a2de26c26 100644
--- a/R-package/R/xgb.dump.R
+++ b/R-package/R/xgb.dump.R
@@ -38,7 +38,7 @@
 #' cat(xgb.dump(bst, with_stats = TRUE, dump_format='json'))
 #'
 #' @export
-xgb.dump <- function(model, fname = NULL, fmap = "", with_stats=FALSE,
+xgb.dump <- function(model, fname = NULL, fmap = "", with_stats = FALSE,
                      dump_format = c("text", "json"), ...) {
   check.deprecation(...)
   dump_format <- match.arg(dump_format)
diff --git a/R-package/demo/caret_wrapper.R b/R-package/demo/caret_wrapper.R
index cace1e469..0e63f27ce 100644
--- a/R-package/demo/caret_wrapper.R
+++ b/R-package/demo/caret_wrapper.R
@@ -34,7 +34,7 @@ df[, ID := NULL]
 # Here we use 10-fold cross-validation, repeating twice, and using random search for tuning hyper-parameters.
 fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 2, search = "random")
 # train a xgbTree model using caret::train
-model <- train(factor(Improved)~., data = df, method = "xgbTree", trControl = fitControl)
+model <- train(factor(Improved) ~ ., data = df, method = "xgbTree", trControl = fitControl)
 
 # Instead of tree for our boosters, you can also fit a linear regression or logistic regression model
 # using xgbLinear
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index ad8c8a830..4420c9105 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -404,7 +404,7 @@ test_that("Configuration works", {
   config <- xgb.config(bst)
   xgb.config(bst) <- config
   reloaded_config <- xgb.config(bst)
-  expect_equal(config, reloaded_config);
+  expect_equal(config, reloaded_config)
 })
 
 test_that("strict_shape works", {
diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R
index eb83544d8..74d172f69 100644
--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@@ -53,7 +53,7 @@ test_that("xgb.DMatrix: saving, loading", {
   dtrain <- xgb.DMatrix(tmp_file)
   expect_equal(colnames(dtrain), cnames)
 
-  ft <- rep(c("c", "q"), each=length(cnames)/2)
+  ft <- rep(c("c", "q"), each = length(cnames) / 2)
   setinfo(dtrain, "feature_type", ft)
   expect_equal(ft, getinfo(dtrain, "feature_type"))
 })
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index dc61fd1f8..2eab5624f 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -440,7 +440,7 @@ test_that("xgb.plot.shap.summary works", {
 })
 
 test_that("check.deprecation works", {
-  ttt <- function(a = NNULL, DUMMY=NULL, ...) {
+  ttt <- function(a = NNULL, DUMMY = NULL, ...) {
     check.deprecation(...)
     as.list((environment()))
   }
diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd
index 4a4e1811a..c939232a1 100644
--- a/R-package/vignettes/discoverYourData.Rmd
+++ b/R-package/vignettes/discoverYourData.Rmd
@@ -28,7 +28,9 @@ Package loading:
 require(xgboost)
 require(Matrix)
 require(data.table)
-if (!require('vcd')) install.packages('vcd')
+if (!require('vcd')) {
+  install.packages('vcd')
+}
 ```
 
 > **VCD** package is used for one of its embedded dataset only.
@@ -100,7 +102,7 @@ Note that we transform it to `factor` so the algorithm treat these age groups as
 Therefore, 20 is not closer to 30 than 60. To make it short, the distance between ages is lost in this transformation.
 
 ```{r}
-head(df[,AgeDiscret := as.factor(round(Age/10,0))])
+head(df[, AgeDiscret := as.factor(round(Age / 10, 0))])
 ```
 
 ##### Random split into two groups
@@ -108,7 +110,7 @@ head(df[,AgeDiscret := as.factor(round(Age/10,0))])
 Following is an even stronger simplification of the real age with an arbitrary split at 30 years old. We choose this value **based on nothing**. We will see later if simplifying the information based on arbitrary values is a good strategy (you may already have an idea of how well it will work...).
 
 ```{r}
-head(df[,AgeCat:= as.factor(ifelse(Age > 30, "Old", "Young"))])
+head(df[, AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))])
 ```
 
 ##### Risks in adding correlated features
@@ -124,13 +126,13 @@ Fortunately, decision tree algorithms (including boosted trees) are very robust
 We remove ID as there is nothing to learn from this feature (it would just add some noise).
 
 ```{r, results='hide'}
-df[,ID:=NULL]
+df[, ID := NULL]
 ```
 
 We will list the different values for the column `Treatment`:
 
 ```{r}
-levels(df[,Treatment])
+levels(df[, Treatment])
 ```
 
 
@@ -147,7 +149,7 @@ For example, the column `Treatment` will be replaced by two columns, `TreatmentP
 Column `Improved` is excluded because it will be our `label` column, the one we want to predict.
 
 ```{r, warning=FALSE,message=FALSE}
-sparse_matrix <- sparse.model.matrix(Improved ~ ., data = df)[,-1]
+sparse_matrix <- sparse.model.matrix(Improved ~ ., data = df)[, -1]
 head(sparse_matrix)
 ```
 
@@ -156,7 +158,7 @@ head(sparse_matrix)
 Create the output `numeric` vector (not as a sparse `Matrix`):
 
 ```{r}
-output_vector <- df[,Improved] == "Marked"
+output_vector <- df[, Improved] == "Marked"
 ```
 
 1. set `Y` vector to `0`;
@@ -170,7 +172,7 @@ The code below is very usual. For more information, you can look at the document
 
 ```{r}
 bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
-               eta = 1, nthread = 2, nrounds = 10,objective = "binary:logistic")
+               eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic")
 
 ```
 
@@ -219,7 +221,7 @@ For that purpose we will execute the same function as above but using two more p
 importanceRaw <- xgb.importance(feature_names = colnames(sparse_matrix), model = bst, data = sparse_matrix, label = output_vector)
 
 # Cleaning for better display
-importanceClean <- importanceRaw[,`:=`(Cover=NULL, Frequency=NULL)]
+importanceClean <- importanceRaw[, `:=`(Cover = NULL, Frequency = NULL)]
 
 head(importanceClean)
 ```
@@ -321,8 +323,8 @@ If you want to try Random Forests algorithm, you can tweak XGBoost parameters!
 For instance, to compute a model with 1000 trees, with a 0.5 factor on sampling rows and columns:
 
 ```{r, warning=FALSE, message=FALSE}
-data(agaricus.train, package='xgboost')
-data(agaricus.test, package='xgboost')
+data(agaricus.train, package = 'xgboost')
+data(agaricus.test, package = 'xgboost')
 train <- agaricus.train
 test <- agaricus.test
 
diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd
index 64038e02d..34f5963d5 100644
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -52,9 +52,9 @@ It has several features:
 For weekly updated version (highly recommended), install from *GitHub*:
 
 ```{r installGithub, eval=FALSE}
-install.packages("drat", repos="https://cran.rstudio.com")
+install.packages("drat", repos = "https://cran.rstudio.com")
 drat:::addRepo("dmlc")
-install.packages("xgboost", repos="http://dmlc.ml/drat/", type = "source")
+install.packages("xgboost", repos = "http://dmlc.ml/drat/", type = "source")
 ```
 
 > *Windows* user will need to install [Rtools](https://cran.r-project.org/bin/windows/Rtools/) first.
@@ -101,8 +101,8 @@ Why *split* the dataset in two parts?
 In the first part we will build our model. In the second part we will want to test it and assess its quality. Without dividing the dataset we would test the model on the data which the algorithm have already seen.
 
 ```{r datasetLoading, results='hold', message=F, warning=F}
-data(agaricus.train, package='xgboost')
-data(agaricus.test, package='xgboost')
+data(agaricus.train, package = 'xgboost')
+data(agaricus.test, package = 'xgboost')
 train <- agaricus.train
 test <- agaricus.test
 ```
@@ -314,8 +314,8 @@ Most of the features below have been implemented to help you to improve your mod
 For the following advanced features, we need to put data in `xgb.DMatrix` as explained above.
 
 ```{r DMatrix, message=F, warning=F}
-dtrain <- xgb.DMatrix(data = train$data, label=train$label)
-dtest <- xgb.DMatrix(data = test$data, label=test$label)
+dtrain <- xgb.DMatrix(data = train$data, label = train$label)
+dtest <- xgb.DMatrix(data = test$data, label = test$label)
 ```
 
 ### Measure learning progress with xgb.train
@@ -332,7 +332,7 @@ One way to measure progress in learning of a model is to provide to **XGBoost**
 For the purpose of this example, we use `watchlist` parameter. It is a list of `xgb.DMatrix`, each of them tagged with a name.
 
 ```{r watchlist, message=F, warning=F}
-watchlist <- list(train=dtrain, test=dtest)
+watchlist <- list(train = dtrain, test = dtest)
 
 bst <- xgb.train(
     data = dtrain
@@ -425,7 +425,7 @@ Information can be extracted from `xgb.DMatrix` using `getinfo` function. Hereaf
 ```{r getinfo, message=F, warning=F}
 label <- getinfo(dtest, "label")
 pred <- predict(bst, dtest)
-err <- as.numeric(sum(as.integer(pred > 0.5) != label))/length(label)
+err <- as.numeric(sum(as.integer(pred > 0.5) != label)) / length(label)
 print(paste("test-error=", err))
 ```
 
@@ -479,7 +479,7 @@ bst2 <- xgb.load("xgboost.model")
 pred2 <- predict(bst2, test$data)
 
 # And now the test
-print(paste("sum(abs(pred2-pred))=", sum(abs(pred2-pred))))
+print(paste("sum(abs(pred2-pred))=", sum(abs(pred2 - pred))))
 ```
 
 ```{r clean, include=FALSE}
@@ -503,7 +503,7 @@ bst3 <- xgb.load(rawVec)
 pred3 <- predict(bst3, test$data)
 
 # pred2 should be identical to pred
-print(paste("sum(abs(pred3-pred))=", sum(abs(pred2-pred))))
+print(paste("sum(abs(pred3-pred))=", sum(abs(pred2 - pred))))
 ```
 
 > Again `0`? It seems that `XGBoost` works pretty well!
diff --git a/R-package/vignettes/xgboostfromJSON.Rmd b/R-package/vignettes/xgboostfromJSON.Rmd
index 544186830..0ea2f56cb 100644
--- a/R-package/vignettes/xgboostfromJSON.Rmd
+++ b/R-package/vignettes/xgboostfromJSON.Rmd
@@ -30,7 +30,7 @@ For the purpose of this tutorial we will load the xgboost, jsonlite, and float p
 require(xgboost)
 require(jsonlite)
 require(float)
-options(digits=22)
+options(digits = 22)
 ```
 
 We will create a toy binary logistic model based on the example first provided [here](https://github.com/dmlc/xgboost/issues/3960), so that we can easily understand the structure of the dumped JSON model object.  This will allow us to understand where discrepancies can occur and how they should be handled.
@@ -50,10 +50,10 @@ labels <- c(1, 1, 1,
             0, 0, 0,
             0, 0, 0)
 
-data <- data.frame(dates = dates, labels=labels)
+data <- data.frame(dates = dates, labels = labels)
 
 bst <- xgboost(
-  data = as.matrix(data$dates), 
+  data = as.matrix(data$dates),
   label = labels,
   nthread = 2,
   nrounds = 1,
@@ -69,7 +69,7 @@ We will now dump the model to JSON and attempt to illustrate a variety of issues
 First let's dump the model to JSON:
 
 ```{r}
-bst_json <- xgb.dump(bst, with_stats = FALSE, dump_format='json')
+bst_json <- xgb.dump(bst, with_stats = FALSE, dump_format = 'json')
 bst_from_json <- fromJSON(bst_json, simplifyDataFrame = FALSE)
 node <- bst_from_json[[1]]
 cat(bst_json)
@@ -78,10 +78,10 @@ cat(bst_json)
 The tree JSON shown by the above code-chunk tells us that if the data is less than 20180132, the tree will output the value in the first leaf.  Otherwise it will output the value in the second leaf.  Let's try to reproduce this manually with the data we have and confirm that it matches the model predictions we've already calculated.
 
 ```{r}
-bst_preds_logodds <- predict(bst,as.matrix(data$dates), outputmargin = TRUE)
+bst_preds_logodds <- predict(bst, as.matrix(data$dates), outputmargin = TRUE)
 
 # calculate the logodds values using the JSON representation
-bst_from_json_logodds <- ifelse(data$dates<node$split_condition,
+bst_from_json_logodds <- ifelse(data$dates < node$split_condition,
                                 node$children[[1]]$leaf,
                                 node$children[[2]]$leaf)
 
@@ -106,19 +106,19 @@ At this stage two things happened:
 To explain this, let's repeat the comparison and round to two decimals:
 
 ```{r}
-round(bst_preds_logodds,2) == round(bst_from_json_logodds,2)
+round(bst_preds_logodds, 2) == round(bst_from_json_logodds, 2)
 ```
 
 If we round to two decimals, we see that only the elements related to data values of `20180131` don't agree.  If we convert the data to floats, they agree:
 
 ```{r}
 # now convert the dates to floats first
-bst_from_json_logodds <- ifelse(fl(data$dates)<node$split_condition,
+bst_from_json_logodds <- ifelse(fl(data$dates) < node$split_condition,
                                 node$children[[1]]$leaf,
                                 node$children[[2]]$leaf)
 
 # test that values are equal
-round(bst_preds_logodds,2) == round(bst_from_json_logodds,2)
+round(bst_preds_logodds, 2) == round(bst_from_json_logodds, 2)
 ```
 
 What's the lesson?  If we are going to work with an imported JSON model, any data must be converted to floats first.  In this case, since '20180131' cannot be represented as a 32-bit float, it is rounded up to 20180132, as shown here:
@@ -143,7 +143,7 @@ None are exactly equal.  What happened?  Although we've converted the data to 32
 
 ```{r}
 # now convert the dates to floats first
-bst_from_json_logodds <- ifelse(fl(data$dates)<fl(node$split_condition),
+bst_from_json_logodds <- ifelse(fl(data$dates) < fl(node$split_condition),
                                 as.numeric(fl(node$children[[1]]$leaf)),
                                 as.numeric(fl(node$children[[2]]$leaf)))
 
@@ -160,12 +160,13 @@ We were able to get the log-odds to agree, so now let's manually calculate the s
 
 
 ```{r}
-bst_preds <- predict(bst,as.matrix(data$dates))
+bst_preds <- predict(bst, as.matrix(data$dates))
 
 # calculate the predictions casting doubles to floats
-bst_from_json_preds <- ifelse(fl(data$dates)<fl(node$split_condition),
-                              as.numeric(1/(1+exp(-1*fl(node$children[[1]]$leaf)))),
-                              as.numeric(1/(1+exp(-1*fl(node$children[[2]]$leaf))))
+bst_from_json_preds <- ifelse(
+  fl(data$dates) < fl(node$split_condition)
+  , as.numeric(1 / (1 + exp(-1 * fl(node$children[[1]]$leaf))))
+  , as.numeric(1 / (1 + exp(-1 * fl(node$children[[2]]$leaf))))
 )
 
 # test that values are equal
@@ -177,9 +178,10 @@ None are exactly equal again.  What is going on here?  Well, since we are using
 How do we fix this?  We have to ensure we use the correct data types everywhere and the correct operators.  If we use only floats, the float library that we have loaded will ensure the 32-bit float exponentiation operator is applied. 
 ```{r}
 # calculate the predictions casting doubles to floats
-bst_from_json_preds <- ifelse(fl(data$dates)<fl(node$split_condition),
-                              as.numeric(fl(1)/(fl(1)+exp(fl(-1)*fl(node$children[[1]]$leaf)))),
-                              as.numeric(fl(1)/(fl(1)+exp(fl(-1)*fl(node$children[[2]]$leaf))))
+bst_from_json_preds <- ifelse(
+  fl(data$dates) < fl(node$split_condition)
+  , as.numeric(fl(1) / (fl(1) + exp(fl(-1) * fl(node$children[[1]]$leaf))))
+  , as.numeric(fl(1) / (fl(1) + exp(fl(-1) * fl(node$children[[2]]$leaf))))
 )
 
 # test that values are equal
diff --git a/demo/data/gen_autoclaims.R b/demo/data/gen_autoclaims.R
index 5d5e6289f..00b4517f8 100644
--- a/demo/data/gen_autoclaims.R
+++ b/demo/data/gen_autoclaims.R
@@ -1,8 +1,10 @@
 site <- 'http://cran.r-project.org'
-if (!require('dummies'))
-    install.packages('dummies', repos=site)
-if (!require('insuranceData'))
-    install.packages('insuranceData', repos=site)
+if (!require('dummies')) {
+    install.packages('dummies', repos = site)
+}
+if (!require('insuranceData')) {
+    install.packages('insuranceData', repos = site)
+}
 
 library(dummies)
 library(insuranceData)
@@ -14,5 +16,16 @@ data$STATE <- as.factor(data$STATE)
 data$CLASS <- as.factor(data$CLASS)
 data$GENDER <- as.factor(data$GENDER)
 
-data.dummy <- dummy.data.frame(data, dummy.class='factor', omit.constants=TRUE);
-write.table(data.dummy, 'autoclaims.csv', sep=',', row.names=F, col.names=F, quote=F)
+data.dummy <- dummy.data.frame(
+    data
+    , dummy.class = 'factor'
+    , omit.constants = TRUE
+)
+write.table(
+    data.dummy
+    , 'autoclaims.csv'
+    , sep = ','
+    , row.names = FALSE
+    , col.names = FALSE
+    , quote = FALSE
+)
diff --git a/demo/kaggle-higgs/higgs-pred.R b/demo/kaggle-higgs/higgs-pred.R
index 5136223b9..ac810fd67 100644
--- a/demo/kaggle-higgs/higgs-pred.R
+++ b/demo/kaggle-higgs/higgs-pred.R
@@ -4,21 +4,21 @@ require(methods)
 
 modelfile <- "higgs.model"
 outfile <- "higgs.pred.csv"
-dtest <- read.csv("data/test.csv", header=TRUE)
+dtest <- read.csv("data/test.csv", header = TRUE)
 data <- as.matrix(dtest[2:31])
 idx <- dtest[[1]]
 
 xgmat <- xgb.DMatrix(data, missing = -999.0)
-bst <- xgb.load(modelfile=modelfile)
+bst <- xgb.load(modelfile = modelfile)
 ypred <- predict(bst, xgmat)
 
-rorder <- rank(ypred, ties.method="first")
+rorder <- rank(ypred, ties.method = "first")
 
 threshold <- 0.15
 # to be completed
-ntop <- length(rorder) - as.integer(threshold*length(rorder))
+ntop <- length(rorder) - as.integer(threshold * length(rorder))
 plabel <- ifelse(rorder > ntop, "s", "b")
 outdata <- list("EventId" = idx,
                 "RankOrder" = rorder,
                 "Class" = plabel)
-write.csv(outdata, file = outfile, quote=FALSE, row.names=FALSE)
+write.csv(outdata, file = outfile, quote = FALSE, row.names = FALSE)
diff --git a/demo/kaggle-higgs/higgs-train.R b/demo/kaggle-higgs/higgs-train.R
index 54e36a73f..6f37040f8 100644
--- a/demo/kaggle-higgs/higgs-train.R
+++ b/demo/kaggle-higgs/higgs-train.R
@@ -4,14 +4,14 @@ require(methods)
 
 testsize <- 550000
 
-dtrain <- read.csv("data/training.csv", header=TRUE)
+dtrain <- read.csv("data/training.csv", header = TRUE)
 dtrain[33] <- dtrain[33] == "s"
 label <- as.numeric(dtrain[[33]])
 data <- as.matrix(dtrain[2:31])
 weight <- as.numeric(dtrain[[32]]) * testsize / length(label)
 
-sumwpos <- sum(weight * (label==1.0))
-sumwneg <- sum(weight * (label==0.0))
+sumwpos <- sum(weight * (label == 1.0))
+sumwneg <- sum(weight * (label == 0.0))
 print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos))
 
 xgmat <- xgb.DMatrix(data, label = label, weight = weight, missing = -999.0)
@@ -25,7 +25,7 @@ param <- list("objective" = "binary:logitraw",
 watchlist <- list("train" = xgmat)
 nrounds <- 120
 print ("loading data end, start to boost trees")
-bst <- xgb.train(param, xgmat, nrounds, watchlist );
+bst <- xgb.train(param, xgmat, nrounds, watchlist)
 # save out model
 xgb.save(bst, "higgs.model")
 print ('finish training')
diff --git a/demo/kaggle-higgs/speedtest.R b/demo/kaggle-higgs/speedtest.R
index 12924aa54..2794fdd75 100644
--- a/demo/kaggle-higgs/speedtest.R
+++ b/demo/kaggle-higgs/speedtest.R
@@ -5,10 +5,10 @@ require(methods)
 
 testsize <- 550000
 
-dtrain <- read.csv("data/training.csv", header=TRUE, nrows=350001)
-dtrain$Label <- as.numeric(dtrain$Label=='s')
+dtrain <- read.csv("data/training.csv", header = TRUE, nrows = 350001)
+dtrain$Label <- as.numeric(dtrain$Label == 's')
 # gbm.time = system.time({
-#   gbm.model <- gbm(Label ~ ., data = dtrain[, -c(1,32)], n.trees = 120, 
+#   gbm.model <- gbm(Label ~ ., data = dtrain[, -c(1,32)], n.trees = 120,
 #                    interaction.depth = 6, shrinkage = 0.1, bag.fraction = 1,
 #                    verbose = TRUE)
 # })
@@ -20,12 +20,12 @@ dtrain$Label <- as.numeric(dtrain$Label=='s')
 data <- as.matrix(dtrain[2:31])
 weight <- as.numeric(dtrain[[32]]) * testsize / length(label)
 
-sumwpos <- sum(weight * (label==1.0))
-sumwneg <- sum(weight * (label==0.0))
+sumwpos <- sum(weight * (label == 1.0))
+sumwneg <- sum(weight * (label == 0.0))
 print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos))
 
 xgboost.time <- list()
-threads <- c(1,2,4,8,16)
+threads <- c(1, 2, 4, 8, 16)
 for (i in 1:length(threads)){
   thread <- threads[i]
   xgboost.time[[i]] <- system.time({
@@ -40,7 +40,7 @@ for (i in 1:length(threads)){
     watchlist <- list("train" = xgmat)
     nrounds <- 120
     print ("loading data end, start to boost trees")
-    bst <- xgb.train(param, xgmat, nrounds, watchlist );
+    bst <- xgb.train(param, xgmat, nrounds, watchlist)
     # save out model
     xgb.save(bst, "higgs.model")
     print ('finish training')
@@ -49,22 +49,21 @@ for (i in 1:length(threads)){
 
 xgboost.time
 # [[1]]
-# user  system elapsed 
-# 99.015   0.051  98.982 
-# 
+# user  system elapsed
+# 99.015   0.051  98.982
+#
 # [[2]]
-# user  system elapsed 
-# 100.268   0.317  55.473 
-# 
+# user  system elapsed
+# 100.268   0.317  55.473
+#
 # [[3]]
-# user  system elapsed 
-# 111.682   0.777  35.963 
-# 
+# user  system elapsed
+# 111.682   0.777  35.963
+#
 # [[4]]
-# user  system elapsed 
-# 149.396   1.851  32.661 
-# 
+# user  system elapsed
+# 149.396   1.851  32.661
+#
 # [[5]]
-# user  system elapsed 
-# 157.390   5.988  40.949 
-
+# user  system elapsed
+# 157.390   5.988  40.949
diff --git a/demo/kaggle-otto/otto_train_pred.R b/demo/kaggle-otto/otto_train_pred.R
index c60a123eb..7f54aebcd 100644
--- a/demo/kaggle-otto/otto_train_pred.R
+++ b/demo/kaggle-otto/otto_train_pred.R
@@ -1,20 +1,20 @@
 require(xgboost)
 require(methods)
 
-train <- read.csv('data/train.csv',header=TRUE,stringsAsFactors = FALSE)
-test <- read.csv('data/test.csv',header=TRUE,stringsAsFactors = FALSE)
-train <- train[,-1]
-test <- test[,-1]
+train <- read.csv('data/train.csv', header = TRUE, stringsAsFactors = FALSE)
+test <- read.csv('data/test.csv', header = TRUE, stringsAsFactors = FALSE)
+train <- train[, -1]
+test <- test[, -1]
 
-y <- train[,ncol(train)]
-y <- gsub('Class_','',y)
-y <- as.integer(y)-1  # xgboost take features in [0,numOfClass)
+y <- train[, ncol(train)]
+y <- gsub('Class_', '', y)
+y <- as.integer(y) - 1  # xgboost take features in [0,numOfClass)
 
-x <- rbind(train[,-ncol(train)],test)
+x <- rbind(train[, -ncol(train)], test)
 x <- as.matrix(x)
-x <- matrix(as.numeric(x),nrow(x),ncol(x))
+x <- matrix(as.numeric(x), nrow(x), ncol(x))
 trind <- 1:length(y)
-teind <- (nrow(train)+1):nrow(x)
+teind <- (nrow(train) + 1):nrow(x)
 
 # Set necessary parameter
 param <- list("objective" = "multi:softprob",
@@ -24,20 +24,25 @@ param <- list("objective" = "multi:softprob",
 
 # Run Cross Validation
 cv.nrounds <- 50
-bst.cv <- xgb.cv(param=param, data = x[trind,], label = y, 
-                nfold = 3, nrounds=cv.nrounds)
+bst.cv <- xgb.cv(
+    param = param
+    , data = x[trind, ]
+    , label = y
+    , nfold = 3
+    , nrounds = cv.nrounds
+)
 
 # Train the model
 nrounds <- 50
-bst <- xgboost(param=param, data = x[trind,], label = y, nrounds=nrounds)
+bst <- xgboost(param = param, data = x[trind, ], label = y, nrounds = nrounds)
 
 # Make prediction
-pred <- predict(bst,x[teind,])
-pred <- matrix(pred,9,length(pred)/9)
+pred <- predict(bst, x[teind, ])
+pred <- matrix(pred, 9, length(pred) / 9)
 pred <- t(pred)
 
 # Output submission
-pred <- format(pred, digits=2,scientific=F) # shrink the size of submission
-pred <- data.frame(1:nrow(pred),pred)
-names(pred) <- c('id', paste0('Class_',1:9))
-write.csv(pred,file='submission.csv', quote=FALSE,row.names=FALSE)
+pred <- format(pred, digits = 2, scientific = FALSE) # shrink the size of submission
+pred <- data.frame(1:nrow(pred), pred)
+names(pred) <- c('id', paste0('Class_', 1:9))
+write.csv(pred, file = 'submission.csv', quote = FALSE, row.names = FALSE)
diff --git a/demo/kaggle-otto/understandingXGBoostModel.Rmd b/demo/kaggle-otto/understandingXGBoostModel.Rmd
index ac8898f48..1939fa10c 100644
--- a/demo/kaggle-otto/understandingXGBoostModel.Rmd
+++ b/demo/kaggle-otto/understandingXGBoostModel.Rmd
@@ -31,7 +31,7 @@ require(methods)
 require(data.table)
 require(magrittr)
 train <- fread('data/train.csv', header = T, stringsAsFactors = FALSE)
-test <- fread('data/test.csv', header=TRUE, stringsAsFactors = FALSE)
+test <- fread('data/test.csv', header = TRUE, stringsAsFactors = FALSE)
 ```
 > `magrittr` and `data.table` are here to make the code cleaner and much more rapid.
 
@@ -42,13 +42,13 @@ Let's explore the dataset.
 dim(train)
 
 # Training content
-train[1:6,1:5, with =FALSE]
+train[1:6, 1:5, with = FALSE]
 
 # Test dataset dimensions
 dim(test)
 
 # Test content
-test[1:6,1:5, with =FALSE]
+test[1:6, 1:5, with = FALSE]
 ```
 > We only display the 6 first rows and 5 first columns for convenience
 
@@ -87,7 +87,7 @@ For that purpose, we will:
 ```{r classToIntegers}
 # Convert from classes to numbers
 y <- train[, nameLastCol, with = FALSE][[1]] %>%
-    gsub('Class_','',.) %>%
+    gsub('Class_', '', .) %>%
     as.integer %>%
     subtract(., 1)
 
@@ -98,14 +98,14 @@ y[1:5]
 We remove label column from training dataset, otherwise **XGBoost** would use it to guess the labels!
 
 ```{r deleteCols, results='hide'}
-train[, nameLastCol:=NULL, with = FALSE]
+train[, nameLastCol := NULL, with = FALSE]
 ```
 
 `data.table` is an awesome implementation of data.frame, unfortunately it is not a format supported natively by **XGBoost**. We need to convert both datasets (training and test) in `numeric` Matrix format.
 
 ```{r convertToNumericMatrix}
-trainMatrix <- train[,lapply(.SD,as.numeric)] %>% as.matrix
-testMatrix <- test[,lapply(.SD,as.numeric)] %>% as.matrix
+trainMatrix <- train[, lapply(.SD, as.numeric)] %>% as.matrix
+testMatrix <- test[, lapply(.SD, as.numeric)] %>% as.matrix
 ```
 
 Model training
@@ -127,7 +127,7 @@ param <- list("objective" = "multi:softprob",
 cv.nrounds <- 5
 cv.nfold <- 3
 
-bst.cv <- xgb.cv(param=param, data = trainMatrix, label = y,
+bst.cv <- xgb.cv(param = param, data = trainMatrix, label = y,
                 nfold = cv.nfold, nrounds = cv.nrounds)
 ```
 > As we can see the error rate is low on the test dataset (for a 5mn trained model).
@@ -136,7 +136,7 @@ Finally, we are ready to train the real model!!!
 
 ```{r modelTraining}
 nrounds <- 50
-bst <- xgboost(param=param, data = trainMatrix, label = y, nrounds=nrounds)
+bst <- xgboost(param = param, data = trainMatrix, label = y, nrounds = nrounds)
 ```
 
 Model understanding
@@ -189,7 +189,7 @@ names <- dimnames(trainMatrix)[[2]]
 importance_matrix <- xgb.importance(names, model = bst)
 
 # Nice graph
-xgb.plot.importance(importance_matrix[1:10,])
+xgb.plot.importance(importance_matrix[1:10, ])
 ```
 
 > To make it understandable we first extract the column names from the `Matrix`.
diff --git a/demo/multiclass_classification/train.R b/demo/multiclass_classification/train.R
index a9ded8748..390690bda 100644
--- a/demo/multiclass_classification/train.R
+++ b/demo/multiclass_classification/train.R
@@ -16,8 +16,8 @@ df[, `:=`(V34 = as.integer(ifelse(V34 == "?", 0L, V34)),
 
 idx <- sample(nrow(df), size = round(0.7 * nrow(df)), replace = FALSE)
 
-train <- df[idx,]
-test <- df[-idx,]
+train <- df[idx, ]
+test <- df[-idx, ]
 
 train_x <- train[, 1:34]
 train_y <- train[, V35]