Add Github Action for R. (#5911)

* Fix lintr errors.
This commit is contained in:
Jiaming Yuan
2020-07-20 19:23:36 +08:00
committed by GitHub
parent b3d2e7644a
commit 8b1afce316
33 changed files with 589 additions and 544 deletions

View File

@@ -2,19 +2,19 @@ require(xgboost)
context("basic functions")
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
train <- agaricus.train
test <- agaricus.test
set.seed(1994)
# disable some tests for Win32
windows_flag = .Platform$OS.type == "windows" &&
windows_flag <- .Platform$OS.type == "windows" &&
.Machine$sizeof.pointer != 8
solaris_flag = (Sys.info()['sysname'] == "SunOS")
solaris_flag <- (Sys.info()['sysname'] == "SunOS")
test_that("train and predict binary classification", {
nrounds = 2
nrounds <- 2
expect_output(
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic")
@@ -30,24 +30,24 @@ test_that("train and predict binary classification", {
pred1 <- predict(bst, train$data, ntreelimit = 1)
expect_length(pred1, 6513)
err_pred1 <- sum((pred1 > 0.5) != train$label)/length(train$label)
err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)
err_log <- bst$evaluation_log[1, train_error]
expect_lt(abs(err_pred1 - err_log), 10e-6)
})
test_that("parameter validation works", {
p <- list(foo = "bar")
nrounds = 1
nrounds <- 1
set.seed(1994)
d <- cbind(
x1 = rnorm(10),
x2 = rnorm(10),
x3 = rnorm(10))
y <- d[,"x1"] + d[,"x2"]^2 +
ifelse(d[,"x3"] > .5, d[,"x3"]^2, 2^d[,"x3"]) +
y <- d[, "x1"] + d[, "x2"]^2 +
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
rnorm(10)
dtrain <- xgb.DMatrix(data=d, info = list(label=y))
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
correct <- function() {
params <- list(max_depth = 2, booster = "dart",
@@ -70,15 +70,15 @@ test_that("parameter validation works", {
test_that("dart prediction works", {
nrounds = 32
nrounds <- 32
set.seed(1994)
d <- cbind(
x1 = rnorm(100),
x2 = rnorm(100),
x3 = rnorm(100))
y <- d[,"x1"] + d[,"x2"]^2 +
ifelse(d[,"x3"] > .5, d[,"x3"]^2, 2^d[,"x3"]) +
y <- d[, "x1"] + d[, "x2"]^2 +
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
rnorm(100)
set.seed(1994)
@@ -87,23 +87,23 @@ test_that("dart prediction works", {
eta = 1, nthread = 2, nrounds = nrounds, objective = "reg:squarederror")
pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, ntreelimit = 0)
pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, ntreelimit = nrounds)
expect_true(all(matrix(pred_by_xgboost_0, byrow=TRUE) == matrix(pred_by_xgboost_1, byrow=TRUE)))
expect_true(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_1, byrow = TRUE)))
pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE)
expect_false(all(matrix(pred_by_xgboost_0, byrow=TRUE) == matrix(pred_by_xgboost_2, byrow=TRUE)))
expect_false(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
set.seed(1994)
dtrain <- xgb.DMatrix(data=d, info = list(label=y))
booster_by_train <- xgb.train( params = list(
booster = "dart",
max_depth = 2,
eta = 1,
rate_drop = 0.5,
one_drop = TRUE,
nthread = 1,
tree_method= "exact",
objective = "reg:squarederror"
),
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
booster_by_train <- xgb.train(params = list(
booster = "dart",
max_depth = 2,
eta = 1,
rate_drop = 0.5,
one_drop = TRUE,
nthread = 1,
tree_method = "exact",
objective = "reg:squarederror"
),
data = dtrain,
nrounds = nrounds
)
@@ -111,9 +111,9 @@ test_that("dart prediction works", {
pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, ntreelimit = nrounds)
pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)
expect_true(all(matrix(pred_by_train_0, byrow=TRUE) == matrix(pred_by_xgboost_0, byrow=TRUE)))
expect_true(all(matrix(pred_by_train_1, byrow=TRUE) == matrix(pred_by_xgboost_1, byrow=TRUE)))
expect_true(all(matrix(pred_by_train_2, byrow=TRUE) == matrix(pred_by_xgboost_2, byrow=TRUE)))
expect_true(all(matrix(pred_by_train_0, byrow = TRUE) == matrix(pred_by_xgboost_0, byrow = TRUE)))
expect_true(all(matrix(pred_by_train_1, byrow = TRUE) == matrix(pred_by_xgboost_1, byrow = TRUE)))
expect_true(all(matrix(pred_by_train_2, byrow = TRUE) == matrix(pred_by_xgboost_2, byrow = TRUE)))
})
test_that("train and predict softprob", {
@@ -122,7 +122,7 @@ test_that("train and predict softprob", {
expect_output(
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
objective = "multi:softprob", num_class=3)
objective = "multi:softprob", num_class = 3)
, "train-merror")
expect_false(is.null(bst$evaluation_log))
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
@@ -130,17 +130,17 @@ test_that("train and predict softprob", {
pred <- predict(bst, as.matrix(iris[, -5]))
expect_length(pred, nrow(iris) * 3)
# row sums add up to total probability of 1:
expect_equal(rowSums(matrix(pred, ncol=3, byrow=TRUE)), rep(1, nrow(iris)), tolerance = 1e-7)
expect_equal(rowSums(matrix(pred, ncol = 3, byrow = TRUE)), rep(1, nrow(iris)), tolerance = 1e-7)
# manually calculate error at the last iteration:
mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
expect_equal(as.numeric(t(mpred)), pred)
pred_labels <- max.col(mpred) - 1
err <- sum(pred_labels != lb)/length(lb)
err <- sum(pred_labels != lb) / length(lb)
expect_equal(bst$evaluation_log[5, train_merror], err, tolerance = 5e-6)
# manually calculate error at the 1st iteration:
mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 1)
pred_labels <- max.col(mpred) - 1
err <- sum(pred_labels != lb)/length(lb)
err <- sum(pred_labels != lb) / length(lb)
expect_equal(bst$evaluation_log[1, train_merror], err, tolerance = 5e-6)
})
@@ -150,7 +150,7 @@ test_that("train and predict softmax", {
expect_output(
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5,
objective = "multi:softmax", num_class=3)
objective = "multi:softmax", num_class = 3)
, "train-merror")
expect_false(is.null(bst$evaluation_log))
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
@@ -158,7 +158,7 @@ test_that("train and predict softmax", {
pred <- predict(bst, as.matrix(iris[, -5]))
expect_length(pred, nrow(iris))
err <- sum(pred != lb)/length(lb)
err <- sum(pred != lb) / length(lb)
expect_equal(bst$evaluation_log[5, train_merror], err, tolerance = 5e-6)
})
@@ -173,12 +173,12 @@ test_that("train and predict RF", {
expect_equal(xgb.ntree(bst), 20)
pred <- predict(bst, train$data)
pred_err <- sum((pred > 0.5) != lb)/length(lb)
pred_err <- sum((pred > 0.5) != lb) / length(lb)
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
#expect_lt(pred_err, 0.03)
pred <- predict(bst, train$data, ntreelimit = 20)
pred_err_20 <- sum((pred > 0.5) != lb)/length(lb)
pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
expect_equal(pred_err_20, pred_err)
#pred <- predict(bst, train$data, ntreelimit = 1)
@@ -193,19 +193,19 @@ test_that("train and predict RF with softprob", {
set.seed(11)
bst <- xgboost(data = as.matrix(iris[, -5]), label = lb,
max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds,
objective = "multi:softprob", num_class=3, verbose = 0,
objective = "multi:softprob", num_class = 3, verbose = 0,
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5)
expect_equal(bst$niter, 15)
expect_equal(xgb.ntree(bst), 15*3*4)
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
# predict for all iterations:
pred <- predict(bst, as.matrix(iris[, -5]), reshape=TRUE)
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
expect_equal(dim(pred), c(nrow(iris), 3))
pred_labels <- max.col(pred) - 1
err <- sum(pred_labels != lb)/length(lb)
err <- sum(pred_labels != lb) / length(lb)
expect_equal(bst$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)
# predict for 7 iterations and adjust for 4 parallel trees per iteration
pred <- predict(bst, as.matrix(iris[, -5]), reshape=TRUE, ntreelimit = 7 * 4)
err <- sum((max.col(pred) - 1) != lb)/length(lb)
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 7 * 4)
err <- sum((max.col(pred) - 1) != lb) / length(lb)
expect_equal(bst$evaluation_log[7, train_merror], err, tolerance = 5e-6)
})
@@ -223,7 +223,7 @@ test_that("use of multiple eval metrics works", {
test_that("training continuation works", {
dtrain <- xgb.DMatrix(train$data, label = train$label)
watchlist = list(train=dtrain)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2)
# for the reference, use 4 iterations at once:
@@ -255,7 +255,7 @@ test_that("training continuation works", {
test_that("model serialization works", {
out_path <- "model_serialization"
dtrain <- xgb.DMatrix(train$data, label = train$label)
watchlist = list(train=dtrain)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic")
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
raw <- xgb.serialize(booster)
@@ -273,7 +273,7 @@ test_that("xgb.cv works", {
expect_output(
cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5,
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
verbose=TRUE)
verbose = TRUE)
, "train-error:")
expect_is(cv, 'xgb.cv.synchronous')
expect_false(is.null(cv$evaluation_log))
@@ -292,11 +292,11 @@ test_that("xgb.cv works with stratified folds", {
set.seed(314159)
cv <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
verbose=TRUE, stratified = FALSE)
verbose = TRUE, stratified = FALSE)
set.seed(314159)
cv2 <- xgb.cv(data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic",
verbose=TRUE, stratified = TRUE)
verbose = TRUE, stratified = TRUE)
# Stratified folds should result in a different evaluation logs
expect_true(all(cv$evaluation_log[, test_error_mean] != cv2$evaluation_log[, test_error_mean]))
})
@@ -319,7 +319,7 @@ test_that("train and predict with non-strict classes", {
expect_equal(pr0, pr)
# dense matrix-like input of non-matrix class with some inheritance
class(train_dense) <- c('pphmatrix','shmatrix')
class(train_dense) <- c('pphmatrix', 'shmatrix')
expect_true(is.matrix(train_dense))
expect_error(
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
@@ -337,15 +337,15 @@ test_that("train and predict with non-strict classes", {
test_that("max_delta_step works", {
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic", eval_metric="logloss", max_depth = 2, nthread = 2, eta = 0.5)
nrounds = 5
param <- list(objective = "binary:logistic", eval_metric = "logloss", max_depth = 2, nthread = 2, eta = 0.5)
nrounds <- 5
# model with no restriction on max_delta_step
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
# model with restricted max_delta_step
bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1)
# the no-restriction model is expected to have consistently lower loss during the initial interations
expect_true(all(bst1$evaluation_log$train_logloss < bst2$evaluation_log$train_logloss))
expect_lt(mean(bst1$evaluation_log$train_logloss)/mean(bst2$evaluation_log$train_logloss), 0.8)
expect_lt(mean(bst1$evaluation_log$train_logloss) / mean(bst2$evaluation_log$train_logloss), 0.8)
})
test_that("colsample_bytree works", {

View File

@@ -5,8 +5,8 @@ require(data.table)
context("callbacks")
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
train <- agaricus.train
test <- agaricus.test
@@ -21,24 +21,24 @@ ltrain <- add.noise(train$label, 0.2)
ltest <- add.noise(test$label, 0.2)
dtrain <- xgb.DMatrix(train$data, label = ltrain)
dtest <- xgb.DMatrix(test$data, label = ltest)
watchlist = list(train=dtrain, test=dtest)
watchlist <- list(train = dtrain, test = dtest)
err <- function(label, pr) sum((pr > 0.5) != label)/length(label)
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
param <- list(objective = "binary:logistic", max_depth = 2, nthread = 2)
test_that("cb.print.evaluation works as expected", {
bst_evaluation <- c('train-auc'=0.9, 'test-auc'=0.8)
bst_evaluation <- c('train-auc' = 0.9, 'test-auc' = 0.8)
bst_evaluation_err <- NULL
begin_iteration <- 1
end_iteration <- 7
f0 <- cb.print.evaluation(period=0)
f1 <- cb.print.evaluation(period=1)
f5 <- cb.print.evaluation(period=5)
f0 <- cb.print.evaluation(period = 0)
f1 <- cb.print.evaluation(period = 1)
f5 <- cb.print.evaluation(period = 5)
expect_false(is.null(attr(f1, 'call')))
expect_equal(attr(f1, 'name'), 'cb.print.evaluation')
@@ -57,13 +57,13 @@ test_that("cb.print.evaluation works as expected", {
expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
expect_output(f5(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
bst_evaluation_err <- c('train-auc'=0.1, 'test-auc'=0.2)
bst_evaluation_err <- c('train-auc' = 0.1, 'test-auc' = 0.2)
expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\\+0.100000\ttest-auc:0.800000\\+0.200000")
})
test_that("cb.evaluation.log works as expected", {
bst_evaluation <- c('train-auc'=0.9, 'test-auc'=0.8)
bst_evaluation <- c('train-auc' = 0.9, 'test-auc' = 0.8)
bst_evaluation_err <- NULL
evaluation_log <- list()
@@ -75,33 +75,33 @@ test_that("cb.evaluation.log works as expected", {
iteration <- 1
expect_silent(f())
expect_equal(evaluation_log,
list(c(iter=1, bst_evaluation)))
list(c(iter = 1, bst_evaluation)))
iteration <- 2
expect_silent(f())
expect_equal(evaluation_log,
list(c(iter=1, bst_evaluation), c(iter=2, bst_evaluation)))
list(c(iter = 1, bst_evaluation), c(iter = 2, bst_evaluation)))
expect_silent(f(finalize = TRUE))
expect_equal(evaluation_log,
data.table(iter=1:2, train_auc=c(0.9,0.9), test_auc=c(0.8,0.8)))
data.table(iter = 1:2, train_auc = c(0.9, 0.9), test_auc = c(0.8, 0.8)))
bst_evaluation_err <- c('train-auc'=0.1, 'test-auc'=0.2)
bst_evaluation_err <- c('train-auc' = 0.1, 'test-auc' = 0.2)
evaluation_log <- list()
f <- cb.evaluation.log()
iteration <- 1
expect_silent(f())
expect_equal(evaluation_log,
list(c(iter=1, c(bst_evaluation, bst_evaluation_err))))
list(c(iter = 1, c(bst_evaluation, bst_evaluation_err))))
iteration <- 2
expect_silent(f())
expect_equal(evaluation_log,
list(c(iter=1, c(bst_evaluation, bst_evaluation_err)),
c(iter=2, c(bst_evaluation, bst_evaluation_err))))
list(c(iter = 1, c(bst_evaluation, bst_evaluation_err)),
c(iter = 2, c(bst_evaluation, bst_evaluation_err))))
expect_silent(f(finalize = TRUE))
expect_equal(evaluation_log,
data.table(iter=1:2,
train_auc_mean=c(0.9,0.9), train_auc_std=c(0.1,0.1),
test_auc_mean=c(0.8,0.8), test_auc_std=c(0.2,0.2)))
data.table(iter = 1:2,
train_auc_mean = c(0.9, 0.9), train_auc_std = c(0.1, 0.1),
test_auc_mean = c(0.8, 0.8), test_auc_std = c(0.2, 0.2)))
})
@@ -237,7 +237,7 @@ test_that("early stopping using a specific metric works", {
set.seed(11)
expect_output(
bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.6,
eval_metric="logloss", eval_metric="auc",
eval_metric = "logloss", eval_metric = "auc",
callbacks = list(cb.early.stop(stopping_rounds = 3, maximize = FALSE,
metric_name = 'test_logloss')))
, "Stopping. Best iteration")
@@ -267,12 +267,12 @@ test_that("early stopping xgb.cv works", {
test_that("prediction in xgb.cv works", {
set.seed(11)
nrounds = 4
nrounds <- 4
cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
expect_false(is.null(cv$evaluation_log))
expect_false(is.null(cv$pred))
expect_length(cv$pred, nrow(train$data))
err_pred <- mean( sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))) )
err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))))
err_log <- cv$evaluation_log[nrounds, test_error_mean]
expect_equal(err_pred, err_log, tolerance = 1e-6)
@@ -308,7 +308,7 @@ test_that("prediction in early-stopping xgb.cv works", {
expect_false(is.null(cv$pred))
expect_length(cv$pred, nrow(train$data))
err_pred <- mean( sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))) )
err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))))
err_log <- cv$evaluation_log[cv$best_iteration, test_error_mean]
expect_equal(err_pred, err_log, tolerance = 1e-6)
err_log_last <- cv$evaluation_log[cv$niter, test_error_mean]

View File

@@ -4,8 +4,8 @@ require(xgboost)
set.seed(1994)
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
watchlist <- list(eval = dtest, train = dtrain)
@@ -24,8 +24,8 @@ evalerror <- function(preds, dtrain) {
return(list(metric = "error", value = err))
}
param <- list(max_depth=2, eta=1, nthread = 2,
objective=logregobj, eval_metric=evalerror)
param <- list(max_depth = 2, eta = 1, nthread = 2,
objective = logregobj, eval_metric = evalerror)
num_round <- 2
test_that("custom objective works", {
@@ -37,7 +37,7 @@ test_that("custom objective works", {
})
test_that("custom objective in CV works", {
cv <- xgb.cv(param, dtrain, num_round, nfold=10, verbose=FALSE)
cv <- xgb.cv(param, dtrain, num_round, nfold = 10, verbose = FALSE)
expect_false(is.null(cv$evaluation_log))
expect_equal(dim(cv$evaluation_log), c(2, 5))
expect_lt(cv$evaluation_log[num_round, test_error_mean], 0.03)
@@ -54,14 +54,14 @@ test_that("custom objective using DMatrix attr works", {
hess <- preds * (1 - preds)
return(list(grad = grad, hess = hess))
}
param$objective = logregobjattr
param$objective <- logregobjattr
bst <- xgb.train(param, dtrain, num_round, watchlist)
expect_equal(class(bst), "xgb.Booster")
})
test_that("custom objective with multi-class works", {
data = as.matrix(iris[, -5])
label = as.numeric(iris$Species) - 1
data <- as.matrix(iris[, -5])
label <- as.numeric(iris$Species) - 1
dtrain <- xgb.DMatrix(data = data, label = label)
nclasses <- 3
@@ -72,6 +72,6 @@ test_that("custom objective with multi-class works", {
hess <- rnorm(dim(as.matrix(preds))[1])
return (list(grad = grad, hess = hess))
}
param$objective = fake_softprob
bst <- xgb.train(param, dtrain, 1, num_class=nclasses)
param$objective <- fake_softprob
bst <- xgb.train(param, dtrain, 1, num_class = nclasses)
})

View File

@@ -3,29 +3,29 @@ require(Matrix)
context("testing xgb.DMatrix functionality")
data(agaricus.test, package='xgboost')
test_data <- agaricus.test$data[1:100,]
data(agaricus.test, package = 'xgboost')
test_data <- agaricus.test$data[1:100, ]
test_label <- agaricus.test$label[1:100]
test_that("xgb.DMatrix: basic construction", {
# from sparse matrix
dtest1 <- xgb.DMatrix(test_data, label=test_label)
dtest1 <- xgb.DMatrix(test_data, label = test_label)
# from dense matrix
dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label)
dtest2 <- xgb.DMatrix(as.matrix(test_data), label = test_label)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label'))
expect_equal(dim(dtest1), dim(dtest2))
#from dense integer matrix
int_data <- as.matrix(test_data)
storage.mode(int_data) <- "integer"
dtest3 <- xgb.DMatrix(int_data, label=test_label)
dtest3 <- xgb.DMatrix(int_data, label = test_label)
expect_equal(dim(dtest1), dim(dtest3))
})
test_that("xgb.DMatrix: saving, loading", {
# save to a local file
dtest1 <- xgb.DMatrix(test_data, label=test_label)
dtest1 <- xgb.DMatrix(test_data, label = test_label)
tmp_file <- tempfile('xgb.DMatrix_')
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
# read from a local file
@@ -35,12 +35,12 @@ test_that("xgb.DMatrix: saving, loading", {
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
# from a libsvm text file
tmp <- c("0 1:1 2:1","1 3:1","0 1:1")
tmp <- c("0 1:1 2:1", "1 3:1", "0 1:1")
tmp_file <- 'tmp.libsvm'
writeLines(tmp, tmp_file)
dtest4 <- xgb.DMatrix(tmp_file, silent = TRUE)
expect_equal(dim(dtest4), c(3, 4))
expect_equal(getinfo(dtest4, 'label'), c(0,1,0))
expect_equal(getinfo(dtest4, 'label'), c(0, 1, 0))
unlink(tmp_file)
})
@@ -61,7 +61,7 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
expect_true(setinfo(dtest, 'weight', test_label))
expect_true(setinfo(dtest, 'base_margin', test_label))
expect_true(setinfo(dtest, 'group', c(50,50)))
expect_true(setinfo(dtest, 'group', c(50, 50)))
expect_error(setinfo(dtest, 'group', test_label))
# providing character values will give a warning
@@ -72,35 +72,35 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
})
test_that("xgb.DMatrix: slice, dim", {
dtest <- xgb.DMatrix(test_data, label=test_label)
dtest <- xgb.DMatrix(test_data, label = test_label)
expect_equal(dim(dtest), dim(test_data))
dsub1 <- slice(dtest, 1:42)
expect_equal(nrow(dsub1), 42)
expect_equal(ncol(dsub1), ncol(test_data))
dsub2 <- dtest[1:42,]
dsub2 <- dtest[1:42, ]
expect_equal(dim(dtest), dim(test_data))
expect_equal(getinfo(dsub1, 'label'), getinfo(dsub2, 'label'))
})
test_that("xgb.DMatrix: slice, trailing empty rows", {
data(agaricus.train, package='xgboost')
data(agaricus.train, package = 'xgboost')
train_data <- agaricus.train$data
train_label <- agaricus.train$label
dtrain <- xgb.DMatrix(data=train_data, label=train_label)
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
slice(dtrain, 6513L)
train_data[6513, ] <- 0
dtrain <- xgb.DMatrix(data=train_data, label=train_label)
dtrain <- xgb.DMatrix(data = train_data, label = train_label)
slice(dtrain, 6513L)
expect_equal(nrow(dtrain), 6513)
})
test_that("xgb.DMatrix: colnames", {
dtest <- xgb.DMatrix(test_data, label=test_label)
dtest <- xgb.DMatrix(test_data, label = test_label)
expect_equal(colnames(dtest), colnames(test_data))
expect_error( colnames(dtest) <- 'asdf')
expect_error(colnames(dtest) <- 'asdf')
new_names <- make.names(1:ncol(test_data))
expect_silent( colnames(dtest) <- new_names)
expect_silent(colnames(dtest) <- new_names)
expect_equal(colnames(dtest), new_names)
expect_silent(colnames(dtest) <- NULL)
expect_null(colnames(dtest))
@@ -109,7 +109,7 @@ test_that("xgb.DMatrix: colnames", {
test_that("xgb.DMatrix: nrow is correct for a very sparse matrix", {
set.seed(123)
nr <- 1000
x <- rsparsematrix(nr, 100, density=0.0005)
x <- rsparsematrix(nr, 100, density = 0.0005)
# we want it very sparse, so that last rows are empty
expect_lt(max(x@i), nr)
dtest <- xgb.DMatrix(x)

View File

@@ -3,8 +3,8 @@ require(xgboost)
context("Garbage Collection Safety Check")
test_that("train and prediction when gctorture is on", {
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
train <- agaricus.train
test <- agaricus.test
gctorture(TRUE)

View File

@@ -3,8 +3,8 @@ context('Test generalized linear models')
require(xgboost)
test_that("gblinear works", {
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
@@ -16,7 +16,7 @@ test_that("gblinear works", {
ERR_UL <- 0.005 # upper limit for the test set error
VERB <- 0 # chatterbox switch
param$updater = 'shotgun'
param$updater <- 'shotgun'
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
ypred <- predict(bst, dtest)
expect_equal(length(getinfo(dtest, 'label')), 1611)
@@ -29,7 +29,7 @@ test_that("gblinear works", {
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
expect_is(h, "matrix")
param$updater = 'coord_descent'
param$updater <- 'coord_descent'
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)

View File

@@ -5,18 +5,18 @@ require(data.table)
require(Matrix)
require(vcd, quietly = TRUE)
float_tolerance = 5e-6
float_tolerance <- 5e-6
# disable some tests for 32-bit environment
flag_32bit = .Machine$sizeof.pointer != 8
flag_32bit <- .Machine$sizeof.pointer != 8
set.seed(1982)
data(Arthritis)
df <- data.table(Arthritis, keep.rownames = FALSE)
df[,AgeDiscret := as.factor(round(Age / 10,0))]
df[,AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
df[,ID := NULL]
sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df)
df[, AgeDiscret := as.factor(round(Age / 10, 0))]
df[, AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))]
df[, ID := NULL]
sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df) # nolint
label <- df[, ifelse(Improved == "Marked", 1, 0)]
# binary
@@ -46,7 +46,7 @@ mbst.GLM <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0,
test_that("xgb.dump works", {
if (!flag_32bit)
expect_length(xgb.dump(bst.Tree), 200)
dump_file = file.path(tempdir(), 'xgb.model.dump')
dump_file <- file.path(tempdir(), 'xgb.model.dump')
expect_true(xgb.dump(bst.Tree, dump_file, with_stats = TRUE))
expect_true(file.exists(dump_file))
expect_gt(file.size(dump_file), 8000)
@@ -63,7 +63,7 @@ test_that("xgb.dump works for gblinear", {
# also make sure that it works properly for a sparse model where some coefficients
# are 0 from setting large L1 regularization:
bst.GLM.sp <- xgboost(data = sparse_matrix, label = label, eta = 1, nthread = 2, nrounds = 1,
alpha=2, objective = "binary:logistic", booster = "gblinear")
alpha = 2, objective = "binary:logistic", booster = "gblinear")
d.sp <- xgb.dump(bst.GLM.sp)
expect_length(d.sp, 14)
expect_gt(sum(d.sp == "0"), 0)
@@ -110,9 +110,9 @@ test_that("predict feature contributions works", {
pred <- predict(bst.GLM, sparse_matrix, outputmargin = TRUE)
expect_lt(max(abs(rowSums(pred_contr) - pred)), 1e-5)
# manual calculation of linear terms
coefs <- xgb.dump(bst.GLM)[-c(1,2,4)] %>% as.numeric
coefs <- xgb.dump(bst.GLM)[-c(1, 2, 4)] %>% as.numeric
coefs <- c(coefs[-1], coefs[1]) # intercept must be the last
pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN="*")
pred_contr_manual <- sweep(cbind(sparse_matrix, 1), 2, coefs, FUN = "*")
expect_equal(as.numeric(pred_contr), as.numeric(pred_contr_manual),
tolerance = float_tolerance)
@@ -130,13 +130,13 @@ test_that("predict feature contributions works", {
pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
pred_contr <- predict(mbst.GLM, as.matrix(iris[, -5]), predcontrib = TRUE)
expect_length(pred_contr, 3)
coefs_all <- xgb.dump(mbst.GLM)[-c(1,2,6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE)
coefs_all <- xgb.dump(mbst.GLM)[-c(1, 2, 6)] %>% as.numeric %>% matrix(ncol = 3, byrow = TRUE)
for (g in seq_along(pred_contr)) {
expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "BIAS"))
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
# manual calculation of linear terms
coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last
pred_contr_manual <- sweep(as.matrix(cbind(iris[,-5], 1)), 2, coefs, FUN="*")
pred_contr_manual <- sweep(as.matrix(cbind(iris[, -5], 1)), 2, coefs, FUN = "*")
expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual),
tolerance = float_tolerance)
}
@@ -147,8 +147,8 @@ test_that("SHAPs sum to predictions, with or without DART", {
x1 = rnorm(100),
x2 = rnorm(100),
x3 = rnorm(100))
y <- d[,"x1"] + d[,"x2"]^2 +
ifelse(d[,"x3"] > .5, d[,"x3"]^2, 2^d[,"x3"]) +
y <- d[, "x1"] + d[, "x2"]^2 +
ifelse(d[, "x3"] > .5, d[, "x3"]^2, 2^d[, "x3"]) +
rnorm(100)
nrounds <- 30
@@ -170,19 +170,19 @@ test_that("SHAPs sum to predictions, with or without DART", {
pred <- pr()
shap <- pr(predcontrib = TRUE)
shapi <- pr(predinteraction = TRUE)
tol = 1e-5
tol <- 1e-5
expect_equal(rowSums(shap), pred, tol = tol)
expect_equal(apply(shapi, 1, sum), pred, tol = tol)
for (i in 1 : nrow(d))
for (f in list(rowSums, colSums))
expect_equal(f(shapi[i,,]), shap[i,], tol = tol)
expect_equal(f(shapi[i, , ]), shap[i, ], tol = tol)
}
})
test_that("xgb-attribute functionality", {
val <- "my attribute value"
list.val <- list(my_attr=val, a=123, b='ok')
list.val <- list(my_attr = val, a = 123, b = 'ok')
list.ch <- list.val[order(names(list.val))]
list.ch <- lapply(list.ch, as.character)
# note: iter is 0-index in xgb attributes
@@ -208,9 +208,9 @@ test_that("xgb-attribute functionality", {
xgb.attr(bst, "my_attr") <- NULL
expect_null(xgb.attr(bst, "my_attr"))
expect_equal(xgb.attributes(bst), list.ch[c("a", "b", "niter")])
xgb.attributes(bst) <- list(a=NULL, b=NULL)
xgb.attributes(bst) <- list(a = NULL, b = NULL)
expect_equal(xgb.attributes(bst), list.default)
xgb.attributes(bst) <- list(niter=NULL)
xgb.attributes(bst) <- list(niter = NULL)
expect_null(xgb.attributes(bst))
})
@@ -268,7 +268,7 @@ test_that("xgb.model.dt.tree works with and without feature names", {
bst.Tree.x$feature_names <- NULL
dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.x)
expect_output(str(dt.tree.x), 'Feature.*\\"3\\"')
expect_equal(dt.tree[, -4, with=FALSE], dt.tree.x[, -4, with=FALSE])
expect_equal(dt.tree[, -4, with = FALSE], dt.tree.x[, -4, with = FALSE])
# using integer node ID instead of character
dt.tree.int <- xgb.model.dt.tree(model = bst.Tree, use_int_id = TRUE)
@@ -295,7 +295,7 @@ test_that("xgb.importance works with and without feature names", {
bst.Tree.x <- bst.Tree
bst.Tree.x$feature_names <- NULL
importance.Tree.x <- xgb.importance(model = bst.Tree)
expect_equal(importance.Tree[, -1, with=FALSE], importance.Tree.x[, -1, with=FALSE],
expect_equal(importance.Tree[, -1, with = FALSE], importance.Tree.x[, -1, with = FALSE],
tolerance = float_tolerance)
imp2plot <- xgb.plot.importance(importance_matrix = importance.Tree)
@@ -305,7 +305,7 @@ test_that("xgb.importance works with and without feature names", {
# for multiclass
imp.Tree <- xgb.importance(model = mbst.Tree)
expect_equal(dim(imp.Tree), c(4, 4))
xgb.importance(model = mbst.Tree, trees = seq(from=0, by=nclass, length.out=nrounds))
xgb.importance(model = mbst.Tree, trees = seq(from = 0, by = nclass, length.out = nrounds))
})
test_that("xgb.importance works with GLM model", {
@@ -320,7 +320,7 @@ test_that("xgb.importance works with GLM model", {
# for multiclass
imp.GLM <- xgb.importance(model = mbst.GLM)
expect_equal(dim(imp.GLM), c(12, 3))
expect_equal(imp.GLM$Class, rep(0:2, each=4))
expect_equal(imp.GLM$Class, rep(0:2, each = 4))
})
test_that("xgb.model.dt.tree and xgb.importance work with a single split model", {

View File

@@ -5,20 +5,20 @@ context("interaction constraints")
set.seed(1024)
x1 <- rnorm(1000, 1)
x2 <- rnorm(1000, 1)
x3 <- sample(c(1,2,3), size=1000, replace=TRUE)
y <- x1 + x2 + x3 + x1*x2*x3 + rnorm(1000, 0.001) + 3*sin(x1)
train <- matrix(c(x1,x2,x3), ncol = 3)
x3 <- sample(c(1, 2, 3), size = 1000, replace = TRUE)
y <- x1 + x2 + x3 + x1 * x2 * x3 + rnorm(1000, 0.001) + 3 * sin(x1)
train <- matrix(c(x1, x2, x3), ncol = 3)
test_that("interaction constraints for regression", {
# Fit a model that only allows interaction between x1 and x2
bst <- xgboost(data = train, label = y, max_depth = 3,
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
interaction_constraints = list(c(0,1)))
interaction_constraints = list(c(0, 1)))
# Set all observations to have the same x3 values then increment
# by the same amount
preds <- lapply(c(1,2,3), function(x){
tmat <- matrix(c(x1,x2,rep(x,1000)), ncol=3)
preds <- lapply(c(1, 2, 3), function(x){
tmat <- matrix(c(x1, x2, rep(x, 1000)), ncol = 3)
return(predict(bst, tmat))
})
@@ -40,16 +40,16 @@ test_that("interaction constraints scientific representation", {
rows <- 10
## When number exceeds 1e5, R paste function uses scientific representation.
## See: https://github.com/dmlc/xgboost/issues/5179
cols <- 1e5+10
cols <- 1e5 + 10
d <- matrix(rexp(rows, rate=.1), nrow=rows, ncol=cols)
d <- matrix(rexp(rows, rate = .1), nrow = rows, ncol = cols)
y <- rnorm(rows)
dtrain <- xgb.DMatrix(data=d, info = list(label=y))
dtrain <- xgb.DMatrix(data = d, info = list(label = y))
inc <- list(c(seq.int(from = 0, to = cols, by = 1)))
with_inc <- xgb.train(data=dtrain, tree_method='hist',
interaction_constraints=inc, nrounds=10)
without_inc <- xgb.train(data=dtrain, tree_method='hist', nrounds=10)
with_inc <- xgb.train(data = dtrain, tree_method = 'hist',
interaction_constraints = inc, nrounds = 10)
without_inc <- xgb.train(data = dtrain, tree_method = 'hist', nrounds = 10)
expect_equal(xgb.save.raw(with_inc), xgb.save.raw(without_inc))
})

View File

@@ -9,9 +9,9 @@ test_that("predict feature interactions works", {
# simulate some binary data and a linear outcome with an interaction term
N <- 1000
P <- 5
X <- matrix(rbinom(N * P, 1, 0.5), ncol=P, dimnames = list(NULL, letters[1:P]))
X <- matrix(rbinom(N * P, 1, 0.5), ncol = P, dimnames = list(NULL, letters[1:P]))
# center the data (as contributions are computed WRT feature means)
X <- scale(X, scale=FALSE)
X <- scale(X, scale = FALSE)
# outcome without any interactions, without any noise:
f <- function(x) 2 * x[, 1] - 3 * x[, 2]
@@ -23,14 +23,14 @@ test_that("predict feature interactions works", {
y <- f_int(X)
dm <- xgb.DMatrix(X, label = y)
param <- list(eta=0.1, max_depth=4, base_score=mean(y), lambda=0, nthread=2)
param <- list(eta = 0.1, max_depth = 4, base_score = mean(y), lambda = 0, nthread = 2)
b <- xgb.train(param, dm, 100)
pred = predict(b, dm, outputmargin=TRUE)
pred <- predict(b, dm, outputmargin = TRUE)
# SHAP contributions:
cont <- predict(b, dm, predcontrib=TRUE)
expect_equal(dim(cont), c(N, P+1))
cont <- predict(b, dm, predcontrib = TRUE)
expect_equal(dim(cont), c(N, P + 1))
# make sure for each row they add up to marginal predictions
max(abs(rowSums(cont) - pred)) %>% expect_lt(0.001)
# Hand-construct the 'ground truth' feature contributions:
@@ -39,43 +39,43 @@ test_that("predict feature interactions works", {
-3. * X[, 2] + 1. * X[, 2] * X[, 3], # attribute a HALF of the interaction term to feature #2
1. * X[, 2] * X[, 3] # and another HALF of the interaction term to feature #3
)
gt_cont <- cbind(gt_cont, matrix(0, nrow=N, ncol=P + 1 - 3))
gt_cont <- cbind(gt_cont, matrix(0, nrow = N, ncol = P + 1 - 3))
# These should be relatively close:
expect_lt(max(abs(cont - gt_cont)), 0.05)
# SHAP interaction contributions:
intr <- predict(b, dm, predinteraction=TRUE)
expect_equal(dim(intr), c(N, P+1, P+1))
intr <- predict(b, dm, predinteraction = TRUE)
expect_equal(dim(intr), c(N, P + 1, P + 1))
# check assigned colnames
cn <- c(letters[1:P], "BIAS")
expect_equal(dimnames(intr), list(NULL, cn, cn))
# check the symmetry
max(abs(aperm(intr, c(1,3,2)) - intr)) %>% expect_lt(0.00001)
max(abs(aperm(intr, c(1, 3, 2)) - intr)) %>% expect_lt(0.00001)
# sums WRT columns must be close to feature contributions
max(abs(apply(intr, c(1,2), sum) - cont)) %>% expect_lt(0.00001)
max(abs(apply(intr, c(1, 2), sum) - cont)) %>% expect_lt(0.00001)
# diagonal terms for features 3,4,5 must be close to zero
Reduce(max, sapply(3:P, function(i) max(abs(intr[, i, i])))) %>% expect_lt(0.05)
# BIAS must have no interactions
max(abs(intr[, 1:P, P+1])) %>% expect_lt(0.00001)
max(abs(intr[, 1:P, P + 1])) %>% expect_lt(0.00001)
# interactions other than 2 x 3 must be close to zero
intr23 <- intr
intr23[,2,3] <- 0
Reduce(max, sapply(1:P, function(i) max(abs(intr23[, i, (i+1):(P+1)])))) %>% expect_lt(0.05)
intr23[, 2, 3] <- 0
Reduce(max, sapply(1:P, function(i) max(abs(intr23[, i, (i + 1):(P + 1)])))) %>% expect_lt(0.05)
# Construct the 'ground truth' contributions of interactions directly from the linear terms:
gt_intr <- array(0, c(N, P+1, P+1))
gt_intr[,2,3] <- 1. * X[, 2] * X[, 3] # attribute a HALF of the interaction term to each symmetric element
gt_intr[,3,2] <- gt_intr[, 2, 3]
gt_intr <- array(0, c(N, P + 1, P + 1))
gt_intr[, 2, 3] <- 1. * X[, 2] * X[, 3] # attribute a HALF of the interaction term to each symmetric element
gt_intr[, 3, 2] <- gt_intr[, 2, 3]
# merge-in the diagonal based on 'ground truth' feature contributions
intr_diag = gt_cont - apply(gt_intr, c(1,2), sum)
for(j in seq_len(P)) {
gt_intr[,j,j] = intr_diag[,j]
intr_diag <- gt_cont - apply(gt_intr, c(1, 2), sum)
for (j in seq_len(P)) {
gt_intr[, j, j] <- intr_diag[, j]
}
# These should be relatively close:
expect_lt(max(abs(intr - gt_intr)), 0.1)
@@ -116,26 +116,26 @@ test_that("SHAP contribution values are not NAN", {
test_that("multiclass feature interactions work", {
dm <- xgb.DMatrix(as.matrix(iris[,-5]), label=as.numeric(iris$Species)-1)
param <- list(eta=0.1, max_depth=4, objective='multi:softprob', num_class=3)
dm <- xgb.DMatrix(as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1)
param <- list(eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3)
b <- xgb.train(param, dm, 40)
pred = predict(b, dm, outputmargin=TRUE) %>% array(c(3, 150)) %>% t
pred <- predict(b, dm, outputmargin = TRUE) %>% array(c(3, 150)) %>% t
# SHAP contributions:
cont <- predict(b, dm, predcontrib=TRUE)
cont <- predict(b, dm, predcontrib = TRUE)
expect_length(cont, 3)
# rewrap them as a 3d array
cont <- unlist(cont) %>% array(c(150, 5, 3))
# make sure for each row they add up to marginal predictions
max(abs(apply(cont, c(1,3), sum) - pred)) %>% expect_lt(0.001)
max(abs(apply(cont, c(1, 3), sum) - pred)) %>% expect_lt(0.001)
# SHAP interaction contributions:
intr <- predict(b, dm, predinteraction=TRUE)
intr <- predict(b, dm, predinteraction = TRUE)
expect_length(intr, 3)
# rewrap them as a 4d array
intr <- unlist(intr) %>% array(c(150, 5, 5, 3)) %>% aperm(c(4, 1, 2, 3)) # [grp, row, col, col]
# check the symmetry
max(abs(aperm(intr, c(1,2,4,3)) - intr)) %>% expect_lt(0.00001)
max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)) %>% expect_lt(0.00001)
# sums WRT columns must be close to feature contributions
max(abs(apply(intr, c(1,2,3), sum) - aperm(cont, c(3,1,2)))) %>% expect_lt(0.00001)
max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))) %>% expect_lt(0.00001)
})

View File

@@ -2,25 +2,25 @@ context("Code is of high quality and lint free")
test_that("Code Lint", {
skip_on_cran()
my_linters <- list(
absolute_paths_linter=lintr::absolute_paths_linter,
assignment_linter=lintr::assignment_linter,
closed_curly_linter=lintr::closed_curly_linter,
commas_linter=lintr::commas_linter,
# commented_code_linter=lintr::commented_code_linter,
infix_spaces_linter=lintr::infix_spaces_linter,
line_length_linter=lintr::line_length_linter,
no_tab_linter=lintr::no_tab_linter,
object_usage_linter=lintr::object_usage_linter,
# snake_case_linter=lintr::snake_case_linter,
# multiple_dots_linter=lintr::multiple_dots_linter,
object_length_linter=lintr::object_length_linter,
open_curly_linter=lintr::open_curly_linter,
# single_quotes_linter=lintr::single_quotes_linter,
spaces_inside_linter=lintr::spaces_inside_linter,
spaces_left_parentheses_linter=lintr::spaces_left_parentheses_linter,
trailing_blank_lines_linter=lintr::trailing_blank_lines_linter,
trailing_whitespace_linter=lintr::trailing_whitespace_linter,
true_false=lintr::T_and_F_symbol_linter
absolute_paths_linter = lintr::absolute_paths_linter,
assignment_linter = lintr::assignment_linter,
closed_curly_linter = lintr::closed_curly_linter,
commas_linter = lintr::commas_linter,
# commented_code_linter = lintr::commented_code_linter,
infix_spaces_linter = lintr::infix_spaces_linter,
line_length_linter = lintr::line_length_linter,
no_tab_linter = lintr::no_tab_linter,
object_usage_linter = lintr::object_usage_linter,
# snake_case_linter = lintr::snake_case_linter,
# multiple_dots_linter = lintr::multiple_dots_linter,
object_length_linter = lintr::object_length_linter,
open_curly_linter = lintr::open_curly_linter,
# single_quotes_linter = lintr::single_quotes_linter,
spaces_inside_linter = lintr::spaces_inside_linter,
spaces_left_parentheses_linter = lintr::spaces_left_parentheses_linter,
trailing_blank_lines_linter = lintr::trailing_blank_lines_linter,
trailing_whitespace_linter = lintr::trailing_whitespace_linter,
true_false = lintr::T_and_F_symbol_linter
)
lintr::expect_lint_free(linters=my_linters) # uncomment this if you want to check code quality
lintr::expect_lint_free(linters = my_linters) # uncomment this if you want to check code quality
})

View File

@@ -3,22 +3,21 @@ require(xgboost)
context("monotone constraints")
set.seed(1024)
x = rnorm(1000, 10)
y = -1*x + rnorm(1000, 0.001) + 3*sin(x)
train = matrix(x, ncol = 1)
x <- rnorm(1000, 10)
y <- -1 * x + rnorm(1000, 0.001) + 3 * sin(x)
train <- matrix(x, ncol = 1)
test_that("monotone constraints for regression", {
bst = xgboost(data = train, label = y, max_depth = 2,
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
monotone_constraints = -1)
pred = predict(bst, train)
ind = order(train[,1])
pred.ord = pred[ind]
expect_true({
!any(diff(pred.ord) > 0)
}, "Monotone Contraint Satisfied")
bst <- xgboost(data = train, label = y, max_depth = 2,
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
monotone_constraints = -1)
pred <- predict(bst, train)
ind <- order(train[, 1])
pred.ord <- pred[ind]
expect_true({
!any(diff(pred.ord) > 0)
}, "Monotone Contraint Satisfied")
})

View File

@@ -2,8 +2,8 @@ context('Test model params and call are exposed to R')
require(xgboost)
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
data(agaricus.train, package = 'xgboost')
data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)

View File

@@ -5,10 +5,10 @@ set.seed(1994)
test_that("poisson regression works", {
data(mtcars)
bst <- xgboost(data = as.matrix(mtcars[,-11]), label = mtcars[,11],
objective = 'count:poisson', nrounds=10, verbose=0)
bst <- xgboost(data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
objective = 'count:poisson', nrounds = 10, verbose = 0)
expect_equal(class(bst), "xgb.Booster")
pred <- predict(bst, as.matrix(mtcars[, -11]))
expect_equal(length(pred), 32)
expect_lt(sqrt(mean( (pred - mtcars[,11])^2 )), 1.2)
expect_lt(sqrt(mean((pred - mtcars[, 11])^2)), 1.2)
})

View File

@@ -9,23 +9,23 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# Disable flaky tests for 32-bit Windows.
# See https://github.com/dmlc/xgboost/issues/3720
win32_flag = .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
test_that("updating the model works", {
watchlist = list(train = dtrain, test = dtest)
watchlist <- list(train = dtrain, test = dtest)
# no-subsampling
p1 <- list(objective = "binary:logistic", max_depth = 2, eta = 0.05, nthread = 2)
set.seed(11)
bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0)
tr1 <- xgb.model.dt.tree(model = bst1)
# with subsampling
p2 <- modifyList(p1, list(subsample = 0.1))
set.seed(11)
bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0)
tr2 <- xgb.model.dt.tree(model = bst2)
# the same no-subsampling boosting with an extra 'refresh' updater:
p1r <- modifyList(p1, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
set.seed(11)
@@ -57,7 +57,7 @@ test_that("updating the model works", {
# all should be the same when no subsampling
expect_equal(bst1$evaluation_log, bst1u$evaluation_log)
expect_equal(tr1, tr1u, tolerance = 0.00001, check.attributes = FALSE)
# process type 'update' for model with subsampling, refreshing only the tree stats from training data:
p2u <- modifyList(p2, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst2)
@@ -72,7 +72,7 @@ test_that("updating the model works", {
if (!win32_flag) {
expect_equal(tr2r, tr2u, tolerance = 0.00001, check.attributes = FALSE)
}
# process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:
p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
@@ -93,12 +93,12 @@ test_that("updating works for multiclass & multitree", {
set.seed(121)
bst0 <- xgb.train(p0, dtr, 5, watchlist, verbose = 0)
tr0 <- xgb.model.dt.tree(model = bst0)
# run update process for an original model with subsampling
p0u <- modifyList(p0, list(process_type='update', updater='refresh', refresh_leaf=FALSE))
p0u <- modifyList(p0, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
bst0u <- xgb.train(p0u, dtr, nrounds = bst0$niter, watchlist, xgb_model = bst0, verbose = 0)
tr0u <- xgb.model.dt.tree(model = bst0u)
# should be the same evaluation but different gains and larger cover
expect_equal(bst0$evaluation_log, bst0u$evaluation_log)
expect_equal(tr0[Feature == 'Leaf']$Quality, tr0u[Feature == 'Leaf']$Quality)