Add Github Action for R. (#5911)

* Fix lintr errors.
This commit is contained in:
Jiaming Yuan 2020-07-20 19:23:36 +08:00 committed by GitHub
parent b3d2e7644a
commit 8b1afce316
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 589 additions and 544 deletions

52
.github/workflows/main.yml vendored Normal file
View File

@ -0,0 +1,52 @@
# This is a basic workflow to help you get started with Actions
name: XGoost-CI
# Controls when the action will run. Triggers the workflow on push or pull request
# events but only for the master branch
on: [push, pull_request]
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
test-with-R:
runs-on: ${{ matrix.config.os }}
name: Test R on OS ${{ matrix.config.os }}, R (${{ matrix.config.r }})
strategy:
fail-fast: false
matrix:
config:
- {os: windows-latest, r: 'release'}
env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
RSPM: ${{ matrix.config.rspm }}
steps:
- uses: actions/checkout@v2
with:
submodules: 'true'
- uses: r-lib/actions/setup-r@master
with:
r-version: ${{ matrix.config.r }}
- name: Install dependencies
shell: Rscript {0}
run: |
install.packages(c('XML','igraph'))
install.packages(c('data.table','magrittr','stringi','ggplot2','DiagrammeR','Ckmeans.1d.dp','vcd','testthat','lintr','knitr','rmarkdown'))
- name: Config R
run: |
mkdir build && cd build
cmake .. -DCMAKE_CONFIGURATION_TYPES="Release" -DR_LIB=ON
- name: Build R
run: |
cmake --build build --target install --config Release
- name: Test R
run: |
cd R-package
R.exe -q -e "library(testthat); setwd('tests'); source('testthat.R')"

View File

@ -351,13 +351,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
finalizer <- function(env) {
if (!is.null(env$bst)) {
attr_best_score = as.numeric(xgb.attr(env$bst$handle, 'best_score'))
attr_best_score <- as.numeric(xgb.attr(env$bst$handle, 'best_score'))
if (best_score != attr_best_score)
stop("Inconsistent 'best_score' values between the closure state: ", best_score,
" and the xgb.attr: ", attr_best_score)
env$bst$best_iteration = best_iteration
env$bst$best_ntreelimit = best_ntreelimit
env$bst$best_score = best_score
env$bst$best_iteration <- best_iteration
env$bst$best_ntreelimit <- best_ntreelimit
env$bst$best_score <- best_score
} else {
env$basket$best_iteration <- best_iteration
env$basket$best_ntreelimit <- best_ntreelimit
@ -372,7 +372,7 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
return(finalizer(env))
i <- env$iteration
score = env$bst_evaluation[metric_idx]
score <- env$bst_evaluation[metric_idx]
if ((maximize && score > best_score) ||
(!maximize && score < best_score)) {
@ -613,9 +613,7 @@ cb.gblinear.history <- function(sparse=FALSE) {
init <- function(env) {
if (!is.null(env$bst)) { # xgb.train:
coef_path <- list()
} else if (!is.null(env$bst_folds)) { # xgb.cv:
coef_path <- rep(list(), length(env$bst_folds))
} else stop("Parent frame has neither 'bst' nor 'bst_folds'")
}

View File

@ -69,9 +69,9 @@ check.booster.params <- function(params, ...) {
if (!is.null(params[['monotone_constraints']]) &&
typeof(params[['monotone_constraints']]) != "character") {
vec2str = paste(params[['monotone_constraints']], collapse = ',')
vec2str = paste0('(', vec2str, ')')
params[['monotone_constraints']] = vec2str
vec2str <- paste(params[['monotone_constraints']], collapse = ',')
vec2str <- paste0('(', vec2str, ')')
params[['monotone_constraints']] <- vec2str
}
# interaction constraints parser (convert from list of column indices to string)

View File

@ -1,6 +1,7 @@
# Construct an internal xgboost Booster and return a handle to it.
# internal utility function
xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
xgb.Booster.handle <- function(params = list(), cachelist = list(),
modelfile = NULL) {
if (typeof(cachelist) != "list" ||
!all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) {
stop("cachelist must be a list of xgb.DMatrix objects")
@ -62,8 +63,8 @@ is.null.handle <- function(handle) {
return(FALSE)
}
# Return a verified to be valid handle out of either xgb.Booster.handle or xgb.Booster
# internal utility function
# Return a verified to be valid handle out of either xgb.Booster.handle or
# xgb.Booster internal utility function
xgb.get.handle <- function(object) {
if (inherits(object, "xgb.Booster")) {
handle <- object$handle

View File

@ -83,5 +83,5 @@ xgb.create.features <- function(model, data, ...){
check.deprecation(...)
pred_with_leaf <- predict(model, data, predleaf = TRUE)
cols <- lapply(as.data.frame(pred_with_leaf), factor)
cbind(data, sparse.model.matrix( ~ . -1, cols))
cbind(data, sparse.model.matrix(~ . -1, cols)) # nolint
}

View File

@ -143,9 +143,9 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
} else if (inherits(data, 'xgb.DMatrix')) {
if (!is.null(label))
warning("xgb.cv: label will be ignored, since data is of type xgb.DMatrix")
cv_label = getinfo(data, 'label')
cv_label <- getinfo(data, 'label')
} else {
cv_label = label
cv_label <- label
}
# CV folds
@ -208,8 +208,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
basket <- list()
# extract parameters that can affect the relationship b/w #trees and #iterations
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1)
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1)
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
# those are fixed for CV (no training continuation)
begin_iteration <- 1
@ -226,7 +226,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
})
msg <- simplify2array(msg)
bst_evaluation <- rowMeans(msg)
bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2)
bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2) # nolint
for (f in cb$post_iter) f()

View File

@ -105,7 +105,7 @@ xgb.ggplot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med
# internal utility function
multiplot <- function(..., cols = 1) {
plots <- list(...)
num_plots = length(plots)
num_plots <- length(plots)
layout <- matrix(seq(1, cols * ceiling(num_plots / cols)),
ncol = cols, nrow = ceiling(num_plots / cols))

View File

@ -117,8 +117,7 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
Weight = weights,
Class = seq_len(num_class) - 1)[order(Class, -abs(Weight))]
}
} else {
# tree model
} else { # tree model
result <- xgb.model.dt.tree(feature_names = feature_names,
text = model_text_dump,
trees = trees)[

View File

@ -331,9 +331,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
if (is_update && nrounds > niter_init)
stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
# TODO: distributed code
rank <- 0
niter_skip <- ifelse(is_update, 0, niter_init)
begin_iteration <- niter_skip + 1
end_iteration <- niter_skip + nrounds
@ -345,7 +342,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
xgb.iter.update(bst$handle, dtrain, iteration - 1, obj)
bst_evaluation <- numeric(0)
if (length(watchlist) > 0)
bst_evaluation <- xgb.iter.eval(bst$handle, watchlist, iteration - 1, feval)
@ -360,7 +356,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
bst <- xgb.Booster.complete(bst, saveraw = TRUE)
# store the total number of boosting iterations
bst$niter = end_iteration
bst$niter <- end_iteration
# store the evaluation results
if (length(evaluation_log) > 0 &&

View File

@ -9,12 +9,12 @@ test <- agaricus.test
set.seed(1994)
# disable some tests for Win32
windows_flag = .Platform$OS.type == "windows" &&
windows_flag <- .Platform$OS.type == "windows" &&
.Machine$sizeof.pointer != 8
solaris_flag = (Sys.info()['sysname'] == "SunOS")
solaris_flag <- (Sys.info()['sysname'] == "SunOS")
test_that("train and predict binary classification", {
nrounds = 2
nrounds <- 2
expect_output(
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic")
@ -37,7 +37,7 @@ test_that("train and predict binary classification", {
test_that("parameter validation works", {
p <- list(foo = "bar")
nrounds = 1
nrounds <- 1
set.seed(1994)
d <- cbind(
@ -70,7 +70,7 @@ test_that("parameter validation works", {
test_that("dart prediction works", {
nrounds = 32
nrounds <- 32
set.seed(1994)
d <- cbind(
@ -223,7 +223,7 @@ test_that("use of multiple eval metrics works", {
test_that("training continuation works", {
dtrain <- xgb.DMatrix(train$data, label = train$label)
watchlist = list(train=dtrain)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
# for the reference, use 4 iterations at once:
@ -255,7 +255,7 @@ test_that("training continuation works", {
test_that("model serialization works", {
out_path <- "model_serialization"
dtrain <- xgb.DMatrix(train$data, label = train$label)
watchlist = list(train=dtrain)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic")
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
raw <- xgb.serialize(booster)
@ -338,7 +338,7 @@ test_that("max_delta_step works", {
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
nrounds = 5
nrounds <- 5
# model with no restriction on max_delta_step
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
# model with restricted max_delta_step

View File

@ -21,7 +21,7 @@ ltrain <- add.noise(train$label, 0.2)
ltest <- add.noise(test$label, 0.2)
dtrain <- xgb.DMatrix(train$data, label = ltrain)
dtest <- xgb.DMatrix(test$data, label = ltest)
watchlist = list(train=dtrain, test=dtest)
watchlist <- list(train = dtrain, test = dtest)
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
@ -267,7 +267,7 @@ test_that("early stopping xgb.cv works", {
test_that("prediction in xgb.cv works", {
set.seed(11)
nrounds = 4
nrounds <- 4
cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
expect_false(is.null(cv$evaluation_log))
expect_false(is.null(cv$pred))

View File

@ -54,14 +54,14 @@ test_that("custom objective using DMatrix attr works", {
hess <- preds * (1 - preds)
return(list(grad = grad, hess = hess))
}
param$objective = logregobjattr
param$objective <- logregobjattr
bst <- xgb.train(param, dtrain, num_round, watchlist)
expect_equal(class(bst), "xgb.Booster")
})
test_that("custom objective with multi-class works", {
data = as.matrix(iris[, -5])
label = as.numeric(iris$Species) - 1
data <- as.matrix(iris[, -5])
label <- as.numeric(iris$Species) - 1
dtrain <- xgb.DMatrix(data = data, label = label)
nclasses <- 3
@ -72,6 +72,6 @@ test_that("custom objective with multi-class works", {
hess <- rnorm(dim(as.matrix(preds))[1])
return (list(grad = grad, hess = hess))
}
param$objective = fake_softprob
param$objective <- fake_softprob
bst <- xgb.train(param, dtrain, 1, num_class = nclasses)
})

View File

@ -16,7 +16,7 @@ test_that("gblinear works", {
ERR_UL <- 0.005 # upper limit for the test set error
VERB <- 0 # chatterbox switch
param$updater = 'shotgun'
param$updater <- 'shotgun'
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
ypred <- predict(bst, dtest)
expect_equal(length(getinfo(dtest, 'label')), 1611)
@ -29,7 +29,7 @@ test_that("gblinear works", {
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
expect_is(h, "matrix")
param$updater = 'coord_descent'
param$updater <- 'coord_descent'
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)

View File

@ -5,10 +5,10 @@ require(data.table)
require(Matrix)
require(vcd, quietly = TRUE)
float_tolerance = 5e-6
float_tolerance <- 5e-6
# disable some tests for 32-bit environment
flag_32bit = .Machine$sizeof.pointer != 8
flag_32bit <- .Machine$sizeof.pointer != 8
set.seed(1982)
data(Arthritis)
@ -16,7 +16,7 @@ df <- data.table(Arthritis, keep.rownames = FALSE)
df[, AgeDiscret := as.factor(round(Age / 10, 0))]
df[, AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
df[, ID := NULL]
sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df) # nolint
label <- df[, ifelse(Improved == "Marked", 1, 0)]
# binary
@ -46,7 +46,7 @@ mbst.GLM <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0,
test_that("xgb.dump works", {
if (!flag_32bit)
expect_length(xgb.dump(bst.Tree), 200)
dump_file = file.path(tempdir(), 'xgb.model.dump')
dump_file <- file.path(tempdir(), 'xgb.model.dump')
expect_true(xgb.dump(bst.Tree, dump_file, with_stats = TRUE))
expect_true(file.exists(dump_file))
expect_gt(file.size(dump_file), 8000)
@ -170,7 +170,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
pred <- pr()
shap <- pr(predcontrib = TRUE)
shapi <- pr(predinteraction = TRUE)
tol = 1e-5
tol <- 1e-5
expect_equal(rowSums(shap), pred, tol = tol)
expect_equal(apply(shapi, 1, sum), pred, tol = tol)

View File

@ -26,7 +26,7 @@ test_that("predict feature interactions works", {
param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
b <- xgb.train(param, dm, 100)
pred = predict(b, dm, outputmargin=TRUE)
pred <- predict(b, dm, outputmargin = TRUE)
# SHAP contributions:
cont <- predict(b, dm, predcontrib = TRUE)
@ -73,9 +73,9 @@ test_that("predict feature interactions works", {
gt_intr[, 2, 3] <- 1. * X[, 2] * X[, 3] # attribute a HALF of the interaction term to each symmetric element
gt_intr[, 3, 2] <- gt_intr[, 2, 3]
# merge-in the diagonal based on 'ground truth' feature contributions
intr_diag = gt_cont - apply(gt_intr, c(1,2), sum)
intr_diag <- gt_cont - apply(gt_intr, c(1, 2), sum)
for (j in seq_len(P)) {
gt_intr[,j,j] = intr_diag[,j]
gt_intr[, j, j] <- intr_diag[, j]
}
# These should be relatively close:
expect_lt(max(abs(intr - gt_intr)), 0.1)
@ -119,7 +119,7 @@ test_that("multiclass feature interactions work", {
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
b <- xgb.train(param, dm, 40)
pred = predict(b, dm, outputmargin=TRUE) %>% array(c(3, 150)) %>% t
pred <- predict(b, dm, outputmargin = TRUE) %>% array(c(3, 150)) %>% t
# SHAP contributions:
cont <- predict(b, dm, predcontrib = TRUE)

View File

@ -3,22 +3,21 @@ require(xgboost)
context("monotone constraints")
set.seed(1024)
x = rnorm(1000, 10)
y = -1*x + rnorm(1000, 0.001) + 3*sin(x)
train = matrix(x, ncol = 1)
x <- rnorm(1000, 10)
y <- -1 * x + rnorm(1000, 0.001) + 3 * sin(x)
train <- matrix(x, ncol = 1)
test_that("monotone constraints for regression", {
bst = xgboost(data = train, label = y, max_depth = 2,
bst <- xgboost(data = train, label = y, max_depth = 2,
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
monotone_constraints = -1)
pred = predict(bst, train)
pred <- predict(bst, train)
ind = order(train[,1])
pred.ord = pred[ind]
ind <- order(train[, 1])
pred.ord <- pred[ind]
expect_true({
!any(diff(pred.ord) > 0)
}, "Monotone Contraint Satisfied")
})

View File

@ -9,10 +9,10 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# Disable flaky tests for 32-bit Windows.
# See https://github.com/dmlc/xgboost/issues/3720
win32_flag = .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
test_that("updating the model works", {
watchlist = list(train = dtrain, test = dtest)
watchlist <- list(train = dtrain, test = dtest)
# no-subsampling
p1 <- list(objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2)