Merge pull request #580 from terrytangyuan/test

Fixed most of the lint issues
2015-10-29 00:54:16 -04:00 · 2015-10-29 00:54:16 -04:00 · b9a9cd9db8
commit b9a9cd9db8
parent 99157ae56a 5b9e071c18
15 changed files with 191 additions and 197 deletions
--- a/R-package/R/predict.xgb.Booster.R
+++ b/R-package/R/predict.xgb.Booster.R
@ -48,7 +48,7 @@ setMethod("predict", signature = "xgb.Booster",
      stop("predict: ntreelimit must be equal to or greater than 1")
    }
  }
-  option = 0
+  option <- 0
  if (outputmargin) {
    option <- option + 1
  }
--- a/R-package/R/slice.xgb.DMatrix.R
+++ b/R-package/R/slice.xgb.DMatrix.R
@ -34,8 +34,8 @@ setMethod("slice", signature = "xgb.DMatrix",
              attr_list <- attributes(object)
              nr <- xgb.numrow(object)
              len <- sapply(attr_list,length)
-              ind <- which(len==nr)
+              ind <- which(len == nr)
-              if (length(ind)>0) {
+              if (length(ind) > 0) {
                  nms <- names(attr_list)[ind]
                  for (i in 1:length(ind)) {
                    attr(ret,nms[i]) <- attr(object,nms[i])[idxset]
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -1,4 +1,4 @@
-#' @importClassesFrom Matrix dgCMatrix dgeMatrix
+  #' @importClassesFrom Matrix dgCMatrix dgeMatrix
 #' @import methods
 # depends on matrix
@ -15,14 +15,14 @@ xgb.setinfo <- function(dmat, name, info) {
    stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
  }
  if (name == "label") {
-    if (length(info)!=xgb.numrow(dmat))
+    if (length(info) != xgb.numrow(dmat))
      stop("The length of labels must equal to the number of rows in the input data")
    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
          PACKAGE = "xgboost")
    return(TRUE)
  }
  if (name == "weight") {
-    if (length(info)!=xgb.numrow(dmat))
+    if (length(info) != xgb.numrow(dmat))
      stop("The length of weights must equal to the number of rows in the input data")
    .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
          PACKAGE = "xgboost")
@ -36,7 +36,7 @@ xgb.setinfo <- function(dmat, name, info) {
    return(TRUE)
  }
  if (name == "group") {
-    if (sum(info)!=xgb.numrow(dmat))
+    if (sum(info) != xgb.numrow(dmat))
      stop("The sum of groups must equal to the number of rows in the input data")
    .Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info),
          PACKAGE = "xgboost")
@ -191,7 +191,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL, prediction = F
      }
      msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
                   evnames, PACKAGE = "xgboost")
-      } else {
+    } else {
      msg <- paste("[", iter, "]", sep="")
      for (j in 1:length(watchlist)) {
        w <- watchlist[j]
@ -251,7 +251,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
      # make simple non-stratified folds
      kstep <- length(randidx) %/% nfold
      folds <- list()
-      for (i in 1:(nfold-1)) {
+      for (i in 1:(nfold - 1)) {
        folds[[i]] <- randidx[1:kstep]
        randidx <- setdiff(randidx, folds[[i]])
      }
@ -261,7 +261,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
  ret <- list()
  for (k in 1:nfold) {
    dtest <- slice(dall, folds[[k]])
-    didx = c()
+    didx <- c()
    for (i in 1:nfold) {
      if (i != k) {
        didx <- append(didx, folds[[i]])
@ -310,9 +310,9 @@ xgb.createFolds <- function(y, k = 10)
    ## At most, we will use quantiles. If the sample
    ## is too small, we just do regular unstratified
    ## CV
-    cuts <- floor(length(y)/k)
+    cuts <- floor(length(y) / k)
-    if(cuts < 2) cuts <- 2
+    if (cuts < 2) cuts <- 2
-    if(cuts > 5) cuts <- 5
+    if (cuts > 5) cuts <- 5
    y <- cut(y,
             unique(stats::quantile(y, probs = seq(0, 1, length = cuts))),
             include.lowest = TRUE)
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@ -124,15 +124,15 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
        stop("xgb.cv: cannot assign two different objectives")
    if (!is.null(params$objective))
        if (class(params$objective) == 'function') {
-            obj = params$objective
+            obj <- params$objective
-            params[['objective']] = NULL
+            params[['objective']] <- NULL
        }
    # if (!is.null(params$eval_metric) && !is.null(feval))
    #  stop("xgb.cv: cannot assign two different evaluation metrics")
    if (!is.null(params$eval_metric))
-        if (class(params$eval_metric)=='function') {
+        if (class(params$eval_metric) == 'function') {
-            feval = params$eval_metric
+            feval <- params$eval_metric
-            params[['eval_metric']] = NULL
+            params[['eval_metric']] <- NULL
        }
    # Early Stopping
@ -144,9 +144,9 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
        if (is.null(maximize))
        {
            if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
-                maximize = FALSE
+                maximize <- FALSE
            } else {
-                maximize = TRUE
+                maximize <- TRUE
            }
        }
@ -158,7 +158,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
        bestInd <- 0
        earlyStopflag <- FALSE
-        if (length(metrics)>1)
+        if (length(metrics) > 1)
            warning('Only the first metric is used for early stopping process.')
    }
@ -167,16 +167,16 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
    mat_pred <- FALSE
    if (!is.null(obj_type) && obj_type == 'multi:softprob')
    {
-        num_class = params[['num_class']]
+        num_class <- params[['num_class']]
        if (is.null(num_class))
            stop('must set num_class to use softmax')
        predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
-        mat_pred = TRUE
+        mat_pred <- TRUE
    }
    else
        predictValues <- rep(0,xgb.numrow(dtrain))
    history <- c()
-    print.every.n = max(as.integer(print.every.n), 1L)
+    print.every.n <- max(as.integer(print.every.n), 1L)
    for (i in 1:nrounds) {
        msg <- list()
        for (k in 1:nfold) {
@ -187,26 +187,25 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
        ret <- xgb.cv.aggcv(msg, showsd)
        history <- c(history, ret)
        if(verbose)
-            if (0 == (i-1L)%%print.every.n)
+            if (0 == (i - 1L) %% print.every.n)
                cat(ret, "\n", sep="")
        # early_Stopping
        if (!is.null(early.stop.round)){
-            score <- strsplit(ret,'\\s+')[[1]][1+length(metrics)+2]
+            score <- strsplit(ret,'\\s+')[[1]][1 + length(metrics) + 2]
            score <- strsplit(score,'\\+|:')[[1]][[2]]
            score <- as.numeric(score)
-            if ((maximize && score > bestScore) || (!maximize && score < bestScore)) {
+            if ( (maximize && score > bestScore) || (!maximize && score < bestScore)) {
                bestScore <- score
                bestInd <- i
            } else {
-                if (i-bestInd >= early.stop.round) {
+                if (i - bestInd >= early.stop.round) {
                    earlyStopflag <- TRUE
                    cat('Stopping. Best iteration:',bestInd)
                    break
                }
            }
        }
    }
    if (prediction) {
@ -226,7 +225,6 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
        }
    }
    colnames <- str_split(string = history[1], pattern = "\t")[[1]] %>% .[2:length(.)] %>% str_extract(".*:") %>% str_replace(":","") %>% str_replace("-", ".")
    colnamesMean <- paste(colnames, "mean")
    if(showsd) colnamesStd <- paste(colnames, "std")
@ -239,10 +237,10 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
    dt <- utils::read.table(text = "", colClasses = type, col.names = colnames) %>% as.data.table
    split <- str_split(string = history, pattern = "\t")
-    for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist(list(dt, .), use.names = F, fill = F)}
+    for(line in split) dt <- line[2:length(line)] %>% str_extract_all(pattern = "\\d*\\.+\\d*") %>% unlist %>% as.numeric %>% as.list %>% {rbindlist( list( dt, .), use.names = F, fill = F)}
    if (prediction) {
-        return(list(dt = dt,pred = predictValues))
+        return( list( dt = dt,pred = predictValues))
    }
    return(dt)
 }
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@ -66,7 +66,7 @@
 #' xgb.importance(train$data@@Dimnames[[2]], model = bst, data = train$data, label = train$label)
 #' 
 #' @export
-xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = NULL, data = NULL, label = NULL, target = function(x) ((x + label) == 2)){ 
+xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = NULL, data = NULL, label = NULL, target = function(x) ( (x + label) == 2)){
  if (!class(feature_names) %in% c("character", "NULL")) {
    stop("feature_names: Has to be a vector of character or NULL if the model dump already contains feature name. Look at this function documentation to see where to get feature names.")
  }
@ -79,7 +79,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
    stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
  }
-  if((is.null(data) & !is.null(label)) |(!is.null(data) & is.null(label))) {
+  if((is.null(data) & !is.null(label)) | (!is.null(data) & is.null(label))) {
    stop("data/label: Provide the two arguments if you want co-occurence computation or none of them if you are not interested but not one of them only.")
  }
@ -110,7 +110,7 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
      d <- data[, result[,Feature], drop=FALSE] < as.numeric(result[,Split])
      apply(c & d, 2, . %>% target %>% sum) -> vec
-      result <- result[, "RealCover":= as.numeric(vec), with = F][, "RealCover %" := RealCover / sum(label)][,MissingNo:=NULL]
+      result <- result[, "RealCover" := as.numeric(vec), with = F][, "RealCover %" := RealCover / sum(label)][,MissingNo := NULL]
    }
  }
  result
@ -119,13 +119,13 @@ xgb.importance <- function(feature_names = NULL, filename_dump = NULL, model = N
 treeDump <- function(feature_names, text, keepDetail){
  if(keepDetail) groupBy <- c("Feature", "Split", "MissingNo") else groupBy <- "Feature"
-  result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo":= Missing == No ][Feature!="Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = groupBy, with = T][,`:=`(Gain = Gain/sum(Gain), Cover = Cover/sum(Cover), Frequence = Frequence/sum(Frequence))][order(Gain, decreasing = T)]
+  result <- xgb.model.dt.tree(feature_names = feature_names, text = text)[,"MissingNo" := Missing == No ][Feature != "Leaf",.(Gain = sum(Quality), Cover = sum(Cover), Frequence = .N), by = groupBy, with = T][,`:=`(Gain = Gain / sum(Gain), Cover = Cover / sum(Cover), Frequence = Frequence / sum(Frequence))][order(Gain, decreasing = T)]
  result
 }
 linearDump <- function(feature_names, text){
-  which(text == "weight:") %>% {a=.+1;text[a:length(text)]} %>% as.numeric %>% data.table(Feature = feature_names, Weight = .)
+  which(text == "weight:") %>% {a =. + 1; text[a:length(text)]} %>% as.numeric %>% data.table(Feature = feature_names, Weight = .)
 }
 # Avoid error messages during CRAN check.
--- a/R-package/R/xgb.model.dt.tree.R
+++ b/R-package/R/xgb.model.dt.tree.R
@ -81,12 +81,12 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
  }
  if(!is.null(model)){
-    text = xgb.dump(model = model, with.stats = T)
+    text <- xgb.dump(model = model, with.stats = T)
  } else if(!is.null(filename_dump)){
    text <- readLines(filename_dump) %>% str_trim(side = "both")
  }
-  position <- str_match(text, "booster") %>% is.na %>% not %>% which %>% c(length(text)+1)
+  position <- str_match(text, "booster") %>% is.na %>% not %>% which %>% c(length(text) + 1)
  extract <- function(x, pattern)  str_extract(x, pattern) %>% str_split("=") %>% lapply(function(x) x[2] %>% as.numeric) %>% unlist
@ -96,15 +96,15 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
  allTrees <- data.table()
-  anynumber_regex<-"[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
+  anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
-  for(i in 1:n_round){
+  for (i in 1:n_round){
-    tree <- text[(position[i]+1):(position[i+1]-1)]
+    tree <- text[(position[i] + 1):(position[i + 1] - 1)]
    # avoid tree made of a leaf only (no split)
-    if(length(tree) <2) next
+    if(length(tree) < 2) next
-    treeID <- i-1
+    treeID <- i - 1
    notLeaf <- str_match(tree, "leaf") %>% is.na
    leaf <- notLeaf %>% not %>% tree[.]
@ -128,7 +128,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, filename_dump = NULL, model
    qualityLeaf <- extract(leaf, paste0("leaf=",anynumber_regex))
    coverBranch <- extract(branch, "cover=\\d*\\.*\\d*")
    coverLeaf <- extract(leaf, "cover=\\d*\\.*\\d*")
-    dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree:=treeID]
+    dt <- data.table(ID = c(idBranch, idLeaf), Feature = c(featureBranch, featureLeaf), Split = c(splitBranch, splitLeaf), Yes = c(yesBranch, yesLeaf), No = c(noBranch, noLeaf), Missing = c(missingBranch, missingLeaf), Quality = c(qualityBranch, qualityLeaf), Cover = c(coverBranch, coverLeaf))[order(ID)][,Tree := treeID]
    allTrees <- rbindlist(list(allTrees, dt), use.names = T, fill = F)
  }
--- a/R-package/R/xgb.plot.importance.R
+++ b/R-package/R/xgb.plot.importance.R
@ -44,9 +44,9 @@ xgb.plot.importance <- function(importance_matrix = NULL, numberOfClusters = c(1
  importance_matrix <- importance_matrix[, .(Gain = sum(Gain)), by = Feature]
  clusters <- suppressWarnings(Ckmeans.1d.dp::Ckmeans.1d.dp(importance_matrix[,Gain], numberOfClusters))
-  importance_matrix[,"Cluster":=clusters$cluster %>% as.character]
+  importance_matrix[,"Cluster" := clusters$cluster %>% as.character]
-  plot <- ggplot2::ggplot(importance_matrix, ggplot2::aes(x=stats::reorder(Feature, Gain), y = Gain, width= 0.05), environment = environment())+  ggplot2::geom_bar(ggplot2::aes(fill=Cluster), stat="identity", position="identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab("Gain") + ggplot2::ggtitle("Feature importance") + ggplot2::theme(plot.title = ggplot2::element_text(lineheight=.9, face="bold"), panel.grid.major.y = ggplot2::element_blank() )
+  plot <- ggplot2::ggplot(importance_matrix, ggplot2::aes(x=stats::reorder(Feature, Gain), y = Gain, width = 0.05), environment = environment()) + ggplot2::geom_bar(ggplot2::aes(fill=Cluster), stat="identity", position="identity") + ggplot2::coord_flip() + ggplot2::xlab("Features") + ggplot2::ylab("Gain") + ggplot2::ggtitle("Feature importance") + ggplot2::theme(plot.title = ggplot2::element_text(lineheight=.9, face="bold"), panel.grid.major.y = ggplot2::element_blank() )
  return(plot)
 }
--- a/R-package/R/xgb.plot.tree.R
+++ b/R-package/R/xgb.plot.tree.R
@ -74,20 +74,19 @@ xgb.plot.tree <- function(feature_names = NULL, filename_dump = NULL, model = NU
    allTrees <- xgb.model.dt.tree(feature_names = feature_names, model = model, n_first_tree = n_first_tree)
  }
-  allTrees[Feature!="Leaf" ,yesPath:= paste(ID,"(", Feature, "<br/>Cover: ", Cover, "<br/>Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
+  allTrees[Feature != "Leaf" ,yesPath := paste(ID,"(", Feature, "<br/>Cover: ", Cover, "<br/>Gain: ", Quality, ")-->|< ", Split, "|", Yes, ">", Yes.Feature, "]", sep = "")]
  allTrees[Feature!="Leaf" ,noPath:= paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
  allTrees[Feature != "Leaf" ,noPath := paste(ID,"(", Feature, ")-->|>= ", Split, "|", No, ">", No.Feature, "]", sep = "")]
  if(is.null(CSSstyle)){
    CSSstyle <- "classDef greenNode fill:#A2EB86, stroke:#04C4AB, stroke-width:2px;classDef redNode fill:#FFA070, stroke:#FF5E5E, stroke-width:2px"
  }
-  yes <- allTrees[Feature!="Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
+  yes <- allTrees[Feature != "Leaf", c(Yes)] %>% paste(collapse = ",") %>% paste("class ", ., " greenNode", sep = "")
-  no <- allTrees[Feature!="Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
+  no <- allTrees[Feature != "Leaf", c(No)] %>% paste(collapse = ",") %>% paste("class ", ., " redNode", sep = "")
-  path <- allTrees[Feature!="Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
+  path <- allTrees[Feature != "Leaf", c(yesPath, noPath)] %>% .[order(.)] %>% paste(sep = "", collapse = ";") %>% paste("graph LR", .,collapse = "", sep = ";") %>% paste(CSSstyle, yes, no, sep = ";")
  DiagrammeR::mermaid(path, width, height)
 }
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@ -140,27 +140,27 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
    warning('watchlist is provided but verbose=0, no evaluation information will be printed')
  }
-  dot.params = list(...)
+  dot.params <- list(...)
-  nms.params = names(params)
+  nms.params <- names(params)
-  nms.dot.params = names(dot.params)
+  nms.dot.params <- names(dot.params)
-  if (length(intersect(nms.params,nms.dot.params))>0)
+  if (length(intersect(nms.params,nms.dot.params)) > 0)
    stop("Duplicated term in parameters. Please check your list of params.")
-  params = append(params, dot.params)
+  params <- append(params, dot.params)
  # customized objective and evaluation metric interface
  if (!is.null(params$objective) && !is.null(obj))
    stop("xgb.train: cannot assign two different objectives")
  if (!is.null(params$objective))
-    if (class(params$objective)=='function') {
+    if (class(params$objective) == 'function') {
-      obj = params$objective
+      obj <- params$objective
-      params$objective = NULL
+      params$objective <- NULL
    }
  if (!is.null(params$eval_metric) && !is.null(feval))
    stop("xgb.train: cannot assign two different evaluation metrics")
  if (!is.null(params$eval_metric))
-    if (class(params$eval_metric)=='function') {
+    if (class(params$eval_metric) == 'function') {
-      feval = params$eval_metric
+      feval <- params$eval_metric
-      params$eval_metric = NULL
+      params$eval_metric <- NULL
    }
  # Early stopping
@ -174,44 +174,43 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
    if (is.null(maximize))
    {
      if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
-        maximize = FALSE
+        maximize <- FALSE
      } else {
-        maximize = TRUE
+        maximize <- TRUE
      }
    }
    if (maximize) {
-      bestScore = 0
+      bestScore <- 0
    } else {
-      bestScore = Inf
+      bestScore <- Inf
    }
-    bestInd = 0
+    bestInd <- 0
    earlyStopflag = FALSE
-    if (length(watchlist)>1)
+    if (length(watchlist) > 1)
      warning('Only the first data set in watchlist is used for early stopping process.')
  }
  handle <- xgb.Booster(params, append(watchlist, dtrain))
  bst <- xgb.handleToBooster(handle)
-  print.every.n=max( as.integer(print.every.n), 1L)
+  print.every.n <- max( as.integer(print.every.n), 1L)
  for (i in 1:nrounds) {
    succ <- xgb.iter.update(bst$handle, dtrain, i - 1, obj)
    if (length(watchlist) != 0) {
      msg <- xgb.iter.eval(bst$handle, watchlist, i - 1, feval)
-      if (0== ( (i-1) %% print.every.n))
+      if (0 == ( (i - 1) %% print.every.n))
-	    cat(paste(msg, "\n", sep=""))
+	    cat(paste(msg, "\n", sep = ""))
      if (!is.null(early.stop.round))
      {
-        score = strsplit(msg,':|\\s+')[[1]][3]
+        score <- strsplit(msg,':|\\s+')[[1]][3]
-        score = as.numeric(score)
+        score <- as.numeric(score)
-        if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
+        if ( (maximize && score > bestScore) || (!maximize && score < bestScore)) {
-          bestScore = score
+          bestScore <- score
-          bestInd = i
+          bestInd <- i
        } else {
-          if (i-bestInd>=early.stop.round) {
+          earlyStopflag = TRUE
-            earlyStopflag = TRUE
+          if (i - bestInd >= early.stop.round) {
            cat('Stopping. Best iteration:',bestInd)
            break
          }
@ -226,8 +225,8 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
  }
  bst <- xgb.Booster.check(bst)
  if (!is.null(early.stop.round)) {
-    bst$bestScore = bestScore
+    bst$bestScore <- bestScore
-    bst$bestInd = bestInd
+    bst$bestInd <- bestInd
  }
  return(bst)
 }
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@ -79,8 +79,6 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
  return(bst)
 }
 #' Training part from Mushroom Data Set
 #' 
 #' This data set is originally from the Mushroom data set,
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@ -4,30 +4,30 @@ context("basic functions")
 data(agaricus.train, package='xgboost')
 data(agaricus.test, package='xgboost')
-train = agaricus.train
+train <- agaricus.train
-test = agaricus.test
+test <- agaricus.test
 test_that("train and predict", {
-  bst = xgboost(data = train$data, label = train$label, max.depth = 2,
+  bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
                eta = 1, nthread = 2, nround = 2, objective = "binary:logistic")
-  pred = predict(bst, test$data)
+  pred <- predict(bst, test$data)
 })
 test_that("early stopping", {
-  res = xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5,
+  res <- xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5,
               eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic",
               early.stop.round = 3, maximize = FALSE)
-  expect_true(nrow(res)<20)
+  expect_true(nrow(res) < 20)
-  bst = xgboost(data = train$data, label = train$label, max.depth = 2,
+  bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
                eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic",
                early.stop.round = 3, maximize = FALSE)
-  pred = predict(bst, test$data)
+  pred <- predict(bst, test$data)
 })
 test_that("save_period", {
-  bst = xgboost(data = train$data, label = train$label, max.depth = 2,
+  bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
                eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic",
                save_period = 10, save_name = "xgb.model")
-  pred = predict(bst, test$data)
+  pred <- predict(bst, test$data)
 })
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@ -13,14 +13,14 @@ test_that("custom objective works", {
  logregobj <- function(preds, dtrain) {
    labels <- getinfo(dtrain, "label")
-    preds <- 1/(1 + exp(-preds))
+    preds <- 1 / (1 + exp(-preds))
    grad <- preds - labels
    hess <- preds * (1 - preds)
    return(list(grad = grad, hess = hess))
  }
  evalerror <- function(preds, dtrain) {
    labels <- getinfo(dtrain, "label")
-    err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
+    err <- as.numeric(sum(labels != (preds > 0))) / length(labels)
    return(list(metric = "error", value = err))
  }
@ -34,13 +34,13 @@ test_that("custom objective works", {
  logregobjattr <- function(preds, dtrain) {
    labels <- attr(dtrain, 'label')
-    preds <- 1/(1 + exp(-preds))
+    preds <- 1 / (1 + exp(-preds))
    grad <- preds - labels
    hess <- preds * (1 - preds)
    return(list(grad = grad, hess = hess))
  }
-  param <- list(max.depth=2, eta=1, nthread = 2, silent=1, 
+  param <- list(max.depth=2, eta=1, nthread = 2, silent = 1,
-                objective=logregobjattr, eval_metric=evalerror)
+                objective = logregobjattr, eval_metric = evalerror)
  bst <- xgb.train(param, dtrain, num_round, watchlist)
  expect_equal(class(bst), "xgb.Booster")
  expect_equal(length(bst$raw), 1064)
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@ -8,11 +8,11 @@ require(vcd)
 data(Arthritis)
 data(agaricus.train, package='xgboost')
 df <- data.table(Arthritis, keep.rownames = F)
-df[,AgeDiscret:= as.factor(round(Age/10,0))]
+df[,AgeDiscret := as.factor(round(Age / 10,0))]
-df[,AgeCat:= as.factor(ifelse(Age > 30, "Old", "Young"))]
+df[,AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
-df[,ID:=NULL]
+df[,ID := NULL]
-sparse_matrix = sparse.model.matrix(Improved~.-1, data = df)
+sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
-output_vector = df[,Y:=0][Improved == "Marked",Y:=1][,Y]
+output_vector <- df[,Y := 0][Improved == "Marked",Y := 1][,Y]
 bst <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9,
               eta = 1, nthread = 2, nround = 10,objective = "binary:logistic")
--- a/R-package/tests/testthat/test_poisson_regression.R
+++ b/R-package/tests/testthat/test_poisson_regression.R
@ -4,10 +4,10 @@ require(xgboost)
 test_that("poisson regression works", {
  data(mtcars)
-  bst = xgboost(data=as.matrix(mtcars[,-11]),label=mtcars[,11],
+  bst <- xgboost(data = as.matrix(mtcars[,-11]),label = mtcars[,11],
-                objective='count:poisson',nrounds=5)
+                objective = 'count:poisson', nrounds=5)
  expect_equal(class(bst), "xgb.Booster")
-  pred = predict(bst,as.matrix(mtcars[,-11]))
+  pred <- predict(bst,as.matrix(mtcars[, -11]))
  expect_equal(length(pred), 32)
-  sqrt(mean((pred-mtcars[,11])^2))
+  sqrt(mean( (pred - mtcars[,11]) ^ 2))
 })