Add Github Action for R. (#5911)

* Fix lintr errors.
2020-07-20 19:23:36 +08:00 · 2020-07-20 19:23:36 +08:00 · 8b1afce316
commit 8b1afce316
parent b3d2e7644a
33 changed files with 589 additions and 544 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -0,0 +1,52 @@
 # This is a basic workflow to help you get started with Actions
 name: XGoost-CI
 # Controls when the action will run. Triggers the workflow on push or pull request
 # events but only for the master branch
 on: [push, pull_request]
 # A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
  test-with-R:
    runs-on: ${{ matrix.config.os }}
    name: Test R on OS ${{ matrix.config.os }}, R (${{ matrix.config.r }})
    strategy:
      fail-fast: false
      matrix:
        config:
          - {os: windows-latest, r: 'release'}
    env:
      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
      RSPM: ${{ matrix.config.rspm }}
    steps:
    - uses: actions/checkout@v2
      with:
        submodules: 'true'
    - uses: r-lib/actions/setup-r@master
      with:
        r-version: ${{ matrix.config.r }}
    - name: Install dependencies
      shell: Rscript {0}
      run: |
        install.packages(c('XML','igraph'))
        install.packages(c('data.table','magrittr','stringi','ggplot2','DiagrammeR','Ckmeans.1d.dp','vcd','testthat','lintr','knitr','rmarkdown'))
    - name: Config R
      run: |
        mkdir build && cd build
        cmake .. -DCMAKE_CONFIGURATION_TYPES="Release" -DR_LIB=ON
    - name: Build R
      run: |
        cmake --build build --target install --config Release
    - name: Test R
      run: |
        cd R-package
        R.exe -q -e "library(testthat); setwd('tests'); source('testthat.R')"
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@ -351,13 +351,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
  finalizer <- function(env) {
    if (!is.null(env$bst)) {
-      attr_best_score = as.numeric(xgb.attr(env$bst$handle, 'best_score'))
+      attr_best_score <- as.numeric(xgb.attr(env$bst$handle, 'best_score'))
      if (best_score != attr_best_score)
        stop("Inconsistent 'best_score' values between the closure state: ", best_score,
             " and the xgb.attr: ", attr_best_score)
-      env$bst$best_iteration = best_iteration
+      env$bst$best_iteration <- best_iteration
-      env$bst$best_ntreelimit = best_ntreelimit
+      env$bst$best_ntreelimit <- best_ntreelimit
-      env$bst$best_score = best_score
+      env$bst$best_score <- best_score
    } else {
      env$basket$best_iteration <- best_iteration
      env$basket$best_ntreelimit <- best_ntreelimit
@ -372,7 +372,7 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
      return(finalizer(env))
    i <- env$iteration
-    score = env$bst_evaluation[metric_idx]
+    score <- env$bst_evaluation[metric_idx]
    if ((maximize && score > best_score) ||
        (!maximize && score < best_score)) {
@ -613,9 +613,7 @@ cb.gblinear.history <- function(sparse=FALSE) {
  init <- function(env) {
    if (!is.null(env$bst)) { # xgb.train:
      coef_path <- list()
    } else if (!is.null(env$bst_folds)) { # xgb.cv:
      coef_path <- rep(list(), length(env$bst_folds))
    } else stop("Parent frame has neither 'bst' nor 'bst_folds'")
  }
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -69,9 +69,9 @@ check.booster.params <- function(params, ...) {
  if (!is.null(params[['monotone_constraints']]) &&
      typeof(params[['monotone_constraints']]) != "character") {
-        vec2str = paste(params[['monotone_constraints']], collapse = ',')
+        vec2str <- paste(params[['monotone_constraints']], collapse = ',')
-        vec2str = paste0('(', vec2str, ')')
+        vec2str <- paste0('(', vec2str, ')')
-        params[['monotone_constraints']] = vec2str
+        params[['monotone_constraints']] <- vec2str
  }
  # interaction constraints parser (convert from list of column indices to string)
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@ -1,6 +1,7 @@
 # Construct an internal xgboost Booster and return a handle to it.
 # internal utility function
-xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
+xgb.Booster.handle <- function(params = list(), cachelist = list(),
                               modelfile = NULL) {
  if (typeof(cachelist) != "list" ||
      !all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) {
    stop("cachelist must be a list of xgb.DMatrix objects")
@ -62,8 +63,8 @@ is.null.handle <- function(handle) {
  return(FALSE)
 }
-# Return a verified to be valid handle out of either xgb.Booster.handle or xgb.Booster
+# Return a verified to be valid handle out of either xgb.Booster.handle or
-# internal utility function
+# xgb.Booster internal utility function
 xgb.get.handle <- function(object) {
  if (inherits(object, "xgb.Booster")) {
    handle <- object$handle
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@ -83,5 +83,5 @@ xgb.create.features <- function(model, data, ...){
  check.deprecation(...)
  pred_with_leaf <- predict(model, data, predleaf = TRUE)
  cols <- lapply(as.data.frame(pred_with_leaf), factor)
-  cbind(data, sparse.model.matrix( ~ . -1, cols))
+  cbind(data, sparse.model.matrix(~ . -1, cols)) # nolint
 }
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@ -143,9 +143,9 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
  } else if (inherits(data, 'xgb.DMatrix')) {
    if (!is.null(label))
      warning("xgb.cv: label will be ignored, since data is of type xgb.DMatrix")
-    cv_label = getinfo(data, 'label')
+    cv_label <- getinfo(data, 'label')
  } else {
-    cv_label = label
+    cv_label <- label
  }
  # CV folds
@ -208,8 +208,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
  basket <- list()
  # extract parameters that can affect the relationship b/w #trees and #iterations
-  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1)
+  num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
-  num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1)
+  num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
  # those are fixed for CV (no training continuation)
  begin_iteration <- 1
@ -226,7 +226,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
    })
    msg <- simplify2array(msg)
    bst_evaluation <- rowMeans(msg)
-    bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2)
+    bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2) # nolint
    for (f in cb$post_iter) f()
--- a/R-package/R/xgb.ggplot.R
+++ b/R-package/R/xgb.ggplot.R
@ -105,7 +105,7 @@ xgb.ggplot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med
 # internal utility function
 multiplot <- function(..., cols = 1) {
  plots <- list(...)
-  num_plots = length(plots)
+  num_plots <- length(plots)
  layout <- matrix(seq(1, cols * ceiling(num_plots / cols)),
                   ncol = cols, nrow = ceiling(num_plots / cols))
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@ -117,8 +117,7 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
                 Weight = weights,
                 Class = seq_len(num_class) - 1)[order(Class, -abs(Weight))]
    }
-  } else { 
+  } else { # tree model
  # tree model
      result <- xgb.model.dt.tree(feature_names = feature_names,
                                  text = model_text_dump,
                                  trees = trees)[
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@ -331,9 +331,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  if (is_update && nrounds > niter_init)
    stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
  # TODO: distributed code
  rank <- 0
  niter_skip <- ifelse(is_update, 0, niter_init)
  begin_iteration <- niter_skip + 1
  end_iteration <- niter_skip + nrounds
@ -345,7 +342,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
    xgb.iter.update(bst$handle, dtrain, iteration - 1, obj)
    bst_evaluation <- numeric(0)
    if (length(watchlist) > 0)
      bst_evaluation <- xgb.iter.eval(bst$handle, watchlist, iteration - 1, feval)
@ -360,7 +356,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
  bst <- xgb.Booster.complete(bst, saveraw = TRUE)
  # store the total number of boosting iterations
-  bst$niter = end_iteration
+  bst$niter <- end_iteration
  # store the evaluation results
  if (length(evaluation_log) > 0 &&
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@ -9,12 +9,12 @@ test <- agaricus.test
 set.seed(1994)
 # disable some tests for Win32
-windows_flag = .Platform$OS.type == "windows" &&
+windows_flag <- .Platform$OS.type == "windows" &&
               .Machine$sizeof.pointer != 8
-solaris_flag = (Sys.info()['sysname'] == "SunOS")
+solaris_flag <- (Sys.info()['sysname'] == "SunOS")
 test_that("train and predict binary classification", {
-  nrounds = 2
+  nrounds <- 2
  expect_output(
    bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
                  eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic")
@ -37,7 +37,7 @@ test_that("train and predict binary classification", {
 test_that("parameter validation works", {
  p <- list(foo = "bar")
-  nrounds = 1
+  nrounds <- 1
  set.seed(1994)
  d <- cbind(
@ -70,7 +70,7 @@ test_that("parameter validation works", {
 test_that("dart prediction works", {
-  nrounds = 32
+  nrounds <- 32
  set.seed(1994)
  d <- cbind(
@ -223,7 +223,7 @@ test_that("use of multiple eval metrics works", {
 test_that("training continuation works", {
  dtrain <- xgb.DMatrix(train$data, label = train$label)
-  watchlist = list(train=dtrain)
+  watchlist <- list(train = dtrain)
  param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
  # for the reference, use 4 iterations at once:
@ -255,7 +255,7 @@ test_that("training continuation works", {
 test_that("model serialization works", {
  out_path <- "model_serialization"
  dtrain <- xgb.DMatrix(train$data, label = train$label)
-  watchlist = list(train=dtrain)
+  watchlist <- list(train = dtrain)
  param <- list(objective = "binary:logistic")
  booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
  raw <- xgb.serialize(booster)
@ -338,7 +338,7 @@ test_that("max_delta_step works", {
  dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
  watchlist <- list(train = dtrain)
  param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
-  nrounds = 5
+  nrounds <- 5
  # model with no restriction on max_delta_step
  bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
  # model with restricted max_delta_step
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@ -21,7 +21,7 @@ ltrain <- add.noise(train$label, 0.2)
 ltest <- add.noise(test$label, 0.2)
 dtrain <- xgb.DMatrix(train$data, label = ltrain)
 dtest <- xgb.DMatrix(test$data, label = ltest)
-watchlist = list(train=dtrain, test=dtest)
+watchlist <- list(train = dtrain, test = dtest)
 err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
@ -267,7 +267,7 @@ test_that("early stopping xgb.cv works", {
 test_that("prediction in xgb.cv works", {
  set.seed(11)
-  nrounds = 4
+  nrounds <- 4
  cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
  expect_false(is.null(cv$evaluation_log))
  expect_false(is.null(cv$pred))
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@ -54,14 +54,14 @@ test_that("custom objective using DMatrix attr works", {
    hess <- preds * (1 - preds)
    return(list(grad = grad, hess = hess))
  }
-  param$objective = logregobjattr
+  param$objective <- logregobjattr
  bst <- xgb.train(param, dtrain, num_round, watchlist)
  expect_equal(class(bst), "xgb.Booster")
 })
 test_that("custom objective with multi-class works", {
-  data = as.matrix(iris[, -5])
+  data <- as.matrix(iris[, -5])
-  label =  as.numeric(iris$Species) - 1
+  label <-  as.numeric(iris$Species) - 1
  dtrain <- xgb.DMatrix(data = data, label = label)
  nclasses <- 3
@ -72,6 +72,6 @@ test_that("custom objective with multi-class works", {
    hess <- rnorm(dim(as.matrix(preds))[1])
    return (list(grad = grad, hess = hess))
  }
-  param$objective = fake_softprob
+  param$objective <- fake_softprob
  bst <- xgb.train(param, dtrain, 1, num_class = nclasses)
 })
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@ -16,7 +16,7 @@ test_that("gblinear works", {
  ERR_UL <- 0.005 # upper limit for the test set error
  VERB <- 0      # chatterbox switch
-  param$updater = 'shotgun'
+  param$updater <- 'shotgun'
  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
  ypred <- predict(bst, dtest)
  expect_equal(length(getinfo(dtest, 'label')), 1611)
@ -29,7 +29,7 @@ test_that("gblinear works", {
  expect_equal(dim(h), c(n, ncol(dtrain) + 1))
  expect_is(h, "matrix")
-  param$updater = 'coord_descent'
+  param$updater <- 'coord_descent'
  bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
  expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@ -5,10 +5,10 @@ require(data.table)
 require(Matrix)
 require(vcd, quietly = TRUE)
-float_tolerance = 5e-6
+float_tolerance <- 5e-6
 # disable some tests for 32-bit environment
-flag_32bit = .Machine$sizeof.pointer != 8
+flag_32bit <- .Machine$sizeof.pointer != 8
 set.seed(1982)
 data(Arthritis)
@ -16,7 +16,7 @@ df <- data.table(Arthritis, keep.rownames = FALSE)
 df[, AgeDiscret := as.factor(round(Age / 10, 0))]
 df[, AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
 df[, ID := NULL]
-sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
+sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df) # nolint
 label <- df[, ifelse(Improved == "Marked", 1, 0)]
 # binary
@ -46,7 +46,7 @@ mbst.GLM <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0,
 test_that("xgb.dump works", {
  if (!flag_32bit)
    expect_length(xgb.dump(bst.Tree), 200)
-  dump_file = file.path(tempdir(), 'xgb.model.dump')
+  dump_file <- file.path(tempdir(), 'xgb.model.dump')
  expect_true(xgb.dump(bst.Tree, dump_file, with_stats = TRUE))
  expect_true(file.exists(dump_file))
  expect_gt(file.size(dump_file), 8000)
@ -170,7 +170,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
    pred <- pr()
    shap <- pr(predcontrib = TRUE)
    shapi <- pr(predinteraction = TRUE)
-    tol = 1e-5
+    tol <- 1e-5
    expect_equal(rowSums(shap), pred, tol = tol)
    expect_equal(apply(shapi, 1, sum), pred, tol = tol)
--- a/R-package/tests/testthat/test_interactions.R
+++ b/R-package/tests/testthat/test_interactions.R
@ -26,7 +26,7 @@ test_that("predict feature interactions works", {
  param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
  b <- xgb.train(param, dm, 100)
-  pred = predict(b, dm, outputmargin=TRUE)
+  pred <- predict(b, dm, outputmargin = TRUE)
  # SHAP contributions:
  cont <- predict(b, dm, predcontrib = TRUE)
@ -73,9 +73,9 @@ test_that("predict feature interactions works", {
  gt_intr[, 2, 3] <- 1. * X[, 2] * X[, 3] # attribute a HALF of the interaction term to each symmetric element
  gt_intr[, 3, 2] <- gt_intr[, 2, 3]
  # merge-in the diagonal based on 'ground truth' feature contributions
-  intr_diag = gt_cont - apply(gt_intr, c(1,2), sum)
+  intr_diag <- gt_cont - apply(gt_intr, c(1, 2), sum)
  for (j in seq_len(P)) {
-    gt_intr[,j,j] = intr_diag[,j]
+    gt_intr[, j, j] <- intr_diag[, j]
  }
  # These should be relatively close:
  expect_lt(max(abs(intr - gt_intr)), 0.1)
@ -119,7 +119,7 @@ test_that("multiclass feature interactions work", {
  dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
  param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
  b <- xgb.train(param, dm, 40)
-  pred = predict(b, dm, outputmargin=TRUE) %>% array(c(3, 150)) %>% t
+  pred <- predict(b, dm, outputmargin = TRUE) %>% array(c(3, 150)) %>% t
  # SHAP contributions:
  cont <- predict(b, dm, predcontrib = TRUE)
--- a/R-package/tests/testthat/test_monotone.R
+++ b/R-package/tests/testthat/test_monotone.R
@ -3,22 +3,21 @@ require(xgboost)
 context("monotone constraints")
 set.seed(1024)
-x = rnorm(1000, 10)
+x <- rnorm(1000, 10)
-y = -1*x + rnorm(1000, 0.001) + 3*sin(x)
+y <- -1 * x + rnorm(1000, 0.001) + 3 * sin(x)
-train = matrix(x, ncol = 1)
+train <- matrix(x, ncol = 1)
 test_that("monotone constraints for regression", {
-  bst = xgboost(data = train, label = y, max_depth = 2,
+    bst <- xgboost(data = train, label = y, max_depth = 2,
                   eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
                   monotone_constraints = -1)
-  pred = predict(bst, train)
+    pred <- predict(bst, train)
-  ind = order(train[,1])
+    ind <- order(train[, 1])
-  pred.ord = pred[ind]
+    pred.ord <- pred[ind]
    expect_true({
        !any(diff(pred.ord) > 0)
    }, "Monotone Contraint Satisfied")
 })
--- a/R-package/tests/testthat/test_update.R
+++ b/R-package/tests/testthat/test_update.R
@ -9,10 +9,10 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
 # Disable flaky tests for 32-bit Windows.
 # See https://github.com/dmlc/xgboost/issues/3720
-win32_flag = .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
+win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
 test_that("updating the model works", {
-  watchlist = list(train = dtrain, test = dtest)
+  watchlist <- list(train = dtrain, test = dtest)
  # no-subsampling
  p1 <- list(objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2)