[R] Replace xgboost() with xgb.train() in most tests and examples (#9941)

This commit is contained in:
david-cortes
2024-01-02 14:20:01 +01:00
committed by GitHub
parent 32cbab1cc0
commit 9e33a10202
27 changed files with 156 additions and 150 deletions

View File

@@ -16,10 +16,11 @@ n_threads <- 1
test_that("train and predict binary classification", {
nrounds <- 2
expect_output(
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
bst <- xgb.train(
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = nrounds,
objective = "binary:logistic", eval_metric = "error"
objective = "binary:logistic", eval_metric = "error",
watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
),
"train-error"
)
@@ -104,9 +105,8 @@ test_that("dart prediction works", {
rnorm(100)
set.seed(1994)
booster_by_xgboost <- xgboost(
data = d,
label = y,
booster_by_xgboost <- xgb.train(
data = xgb.DMatrix(d, label = y),
max_depth = 2,
booster = "dart",
rate_drop = 0.5,
@@ -151,10 +151,11 @@ test_that("train and predict softprob", {
lb <- as.numeric(iris$Species) - 1
set.seed(11)
expect_output(
bst <- xgboost(
data = as.matrix(iris[, -5]), label = lb,
bst <- xgb.train(
data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
objective = "multi:softprob", num_class = 3, eval_metric = "merror",
watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
),
"train-merror"
)
@@ -201,10 +202,11 @@ test_that("train and predict softmax", {
lb <- as.numeric(iris$Species) - 1
set.seed(11)
expect_output(
bst <- xgboost(
data = as.matrix(iris[, -5]), label = lb,
bst <- xgb.train(
data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
objective = "multi:softmax", num_class = 3, eval_metric = "merror"
objective = "multi:softmax", num_class = 3, eval_metric = "merror",
watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
),
"train-merror"
)
@@ -222,11 +224,12 @@ test_that("train and predict RF", {
set.seed(11)
lb <- train$label
# single iteration
bst <- xgboost(
data = train$data, label = lb, max_depth = 5,
bst <- xgb.train(
data = xgb.DMatrix(train$data, label = lb), max_depth = 5,
nthread = n_threads,
nrounds = 1, objective = "binary:logistic", eval_metric = "error",
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1,
watchlist = list(train = xgb.DMatrix(train$data, label = lb))
)
expect_equal(bst$niter, 1)
expect_equal(xgb.ntree(bst), 20)
@@ -248,12 +251,13 @@ test_that("train and predict RF with softprob", {
lb <- as.numeric(iris$Species) - 1
nrounds <- 15
set.seed(11)
bst <- xgboost(
data = as.matrix(iris[, -5]), label = lb,
bst <- xgb.train(
data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
max_depth = 3, eta = 0.9, nthread = n_threads, nrounds = nrounds,
objective = "multi:softprob", eval_metric = "merror",
num_class = 3, verbose = 0,
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5,
watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
)
expect_equal(bst$niter, 15)
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
@@ -271,10 +275,11 @@ test_that("train and predict RF with softprob", {
test_that("use of multiple eval metrics works", {
expect_output(
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
bst <- xgb.train(
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss",
watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
),
"train-error.*train-auc.*train-logloss"
)
@@ -282,10 +287,11 @@ test_that("use of multiple eval metrics works", {
expect_equal(dim(bst$evaluation_log), c(2, 4))
expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
expect_output(
bst2 <- xgboost(
data = train$data, label = train$label, max_depth = 2,
bst2 <- xgb.train(
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = list("error", "auc", "logloss")
eval_metric = list("error", "auc", "logloss"),
watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
),
"train-error.*train-auc.*train-logloss"
)
@@ -361,7 +367,7 @@ test_that("xgb.cv works", {
expect_is(cv, "xgb.cv.synchronous")
expect_false(is.null(cv$evaluation_log))
expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03)
expect_lt(cv$evaluation_log[, min(test_error_std)], 0.008)
expect_lt(cv$evaluation_log[, min(test_error_std)], 0.0085)
expect_equal(cv$niter, 2)
expect_false(is.null(cv$folds) && is.list(cv$folds))
expect_length(cv$folds, 5)
@@ -391,8 +397,8 @@ test_that("xgb.cv works with stratified folds", {
test_that("train and predict with non-strict classes", {
# standard dense matrix input
train_dense <- as.matrix(train$data)
bst <- xgboost(
data = train_dense, label = train$label, max_depth = 2,
bst <- xgb.train(
data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = 0
)
@@ -402,8 +408,8 @@ test_that("train and predict with non-strict classes", {
class(train_dense) <- "shmatrix"
expect_true(is.matrix(train_dense))
expect_error(
bst <- xgboost(
data = train_dense, label = train$label, max_depth = 2,
bst <- xgb.train(
data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = 0
),
@@ -416,8 +422,8 @@ test_that("train and predict with non-strict classes", {
class(train_dense) <- c("pphmatrix", "shmatrix")
expect_true(is.matrix(train_dense))
expect_error(
bst <- xgboost(
data = train_dense, label = train$label, max_depth = 2,
bst <- xgb.train(
data = xgb.DMatrix(train_dense, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
verbose = 0
),
@@ -480,8 +486,8 @@ test_that("colsample_bytree works", {
})
test_that("Configuration works", {
bst <- xgboost(
data = train$data, label = train$label, max_depth = 2,
bst <- xgb.train(
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
)
@@ -521,8 +527,8 @@ test_that("strict_shape works", {
y <- as.numeric(iris$Species) - 1
X <- as.matrix(iris[, -5])
bst <- xgboost(
data = X, label = y,
bst <- xgb.train(
data = xgb.DMatrix(X, label = y),
max_depth = 2, nrounds = n_rounds, nthread = n_threads,
objective = "multi:softprob", num_class = 3, eval_metric = "merror"
)
@@ -536,8 +542,8 @@ test_that("strict_shape works", {
X <- agaricus.train$data
y <- agaricus.train$label
bst <- xgboost(
data = X, label = y, max_depth = 2, nthread = n_threads,
bst <- xgb.train(
data = xgb.DMatrix(X, label = y), max_depth = 2, nthread = n_threads,
nrounds = n_rounds, objective = "binary:logistic",
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss"
)
@@ -555,8 +561,8 @@ test_that("'predict' accepts CSR data", {
x_csc <- as(X[1L, , drop = FALSE], "CsparseMatrix")
x_csr <- as(x_csc, "RsparseMatrix")
x_spv <- as(x_csc, "sparseVector")
bst <- xgboost(
data = X, label = y, objective = "binary:logistic",
bst <- xgb.train(
data = xgb.DMatrix(X, label = y), objective = "binary:logistic",
nrounds = 5L, verbose = FALSE, nthread = n_threads,
)
p_csc <- predict(bst, x_csc)

View File

@@ -265,14 +265,14 @@ test_that("early stopping works with titanic", {
dtx <- model.matrix(~ 0 + ., data = titanic[, c("Pclass", "Sex")])
dty <- titanic$Survived
xgboost::xgboost(
data = dtx,
label = dty,
xgboost::xgb.train(
data = xgb.DMatrix(dtx, label = dty),
objective = "binary:logistic",
eval_metric = "auc",
nrounds = 100,
early_stopping_rounds = 3,
nthread = n_threads
nthread = n_threads,
watchlist = list(train = xgb.DMatrix(dtx, label = dty))
)
expect_true(TRUE) # should not crash

View File

@@ -6,8 +6,8 @@ test_that("train and prediction when gctorture is on", {
train <- agaricus.train
test <- agaricus.test
gctorture(TRUE)
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max.depth = 2,
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
pred <- predict(bst, test$data)
gctorture(FALSE)
expect_length(pred, length(test$label))

View File

@@ -25,15 +25,15 @@ if (isTRUE(VCD_AVAILABLE)) {
label <- df[, ifelse(Improved == "Marked", 1, 0)]
# binary
bst.Tree <- xgboost(data = sparse_matrix, label = label, max_depth = 9,
eta = 1, nthread = 2, nrounds = nrounds, verbose = 0,
objective = "binary:logistic", booster = "gbtree",
base_score = 0.5)
bst.Tree <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), max_depth = 9,
eta = 1, nthread = 2, nrounds = nrounds, verbose = 0,
objective = "binary:logistic", booster = "gbtree",
base_score = 0.5)
bst.GLM <- xgboost(data = sparse_matrix, label = label,
eta = 1, nthread = 1, nrounds = nrounds, verbose = 0,
objective = "binary:logistic", booster = "gblinear",
base_score = 0.5)
bst.GLM <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label),
eta = 1, nthread = 1, nrounds = nrounds, verbose = 0,
objective = "binary:logistic", booster = "gblinear",
base_score = 0.5)
feature.names <- colnames(sparse_matrix)
}
@@ -41,13 +41,13 @@ if (isTRUE(VCD_AVAILABLE)) {
# multiclass
mlabel <- as.numeric(iris$Species) - 1
nclass <- 3
mbst.Tree <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0,
max_depth = 3, eta = 0.5, nthread = 2, nrounds = nrounds,
objective = "multi:softprob", num_class = nclass, base_score = 0)
mbst.Tree <- xgb.train(data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel), verbose = 0,
max_depth = 3, eta = 0.5, nthread = 2, nrounds = nrounds,
objective = "multi:softprob", num_class = nclass, base_score = 0)
mbst.GLM <- xgboost(data = as.matrix(iris[, -5]), label = mlabel, verbose = 0,
booster = "gblinear", eta = 0.1, nthread = 1, nrounds = nrounds,
objective = "multi:softprob", num_class = nclass, base_score = 0)
mbst.GLM <- xgb.train(data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel), verbose = 0,
booster = "gblinear", eta = 0.1, nthread = 1, nrounds = nrounds,
objective = "multi:softprob", num_class = nclass, base_score = 0)
test_that("xgb.dump works", {
@@ -71,8 +71,9 @@ test_that("xgb.dump works for gblinear", {
expect_length(xgb.dump(bst.GLM), 14)
# also make sure that it works properly for a sparse model where some coefficients
# are 0 from setting large L1 regularization:
bst.GLM.sp <- xgboost(data = sparse_matrix, label = label, eta = 1, nthread = 2, nrounds = 1,
alpha = 2, objective = "binary:logistic", booster = "gblinear")
bst.GLM.sp <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), eta = 1,
nthread = 2, nrounds = 1,
alpha = 2, objective = "binary:logistic", booster = "gblinear")
d.sp <- xgb.dump(bst.GLM.sp)
expect_length(d.sp, 14)
expect_gt(sum(d.sp == "0"), 0)
@@ -168,7 +169,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
nrounds <- 30
for (booster in list("gbtree", "dart")) {
fit <- xgboost(
fit <- xgb.train(
params = c(
list(
nthread = 2,
@@ -177,8 +178,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
eval_metric = "rmse"),
if (booster == "dart")
list(rate_drop = .01, one_drop = TRUE)),
data = d,
label = y,
data = xgb.DMatrix(d, label = y),
nrounds = nrounds)
pr <- function(...) {
@@ -360,9 +360,8 @@ test_that("xgb.importance works with and without feature names", {
expect_equal(importance_from_dump(), importance, tolerance = 1e-6)
## decision stump
m <- xgboost::xgboost(
data = as.matrix(data.frame(x = c(0, 1))),
label = c(1, 2),
m <- xgboost::xgb.train(
data = xgb.DMatrix(as.matrix(data.frame(x = c(0, 1))), label = c(1, 2)),
nrounds = 1,
base_score = 0.5,
nthread = 2
@@ -393,9 +392,9 @@ test_that("xgb.importance works with GLM model", {
test_that("xgb.model.dt.tree and xgb.importance work with a single split model", {
.skip_if_vcd_not_available()
bst1 <- xgboost(data = sparse_matrix, label = label, max_depth = 1,
eta = 1, nthread = 2, nrounds = 1, verbose = 0,
objective = "binary:logistic")
bst1 <- xgb.train(data = xgb.DMatrix(sparse_matrix, label = label), max_depth = 1,
eta = 1, nthread = 2, nrounds = 1, verbose = 0,
objective = "binary:logistic")
expect_error(dt <- xgb.model.dt.tree(model = bst1), regexp = NA) # no error
expect_equal(nrow(dt), 3)
expect_error(imp <- xgb.importance(model = bst1), regexp = NA) # no error

View File

@@ -13,9 +13,9 @@ train <- matrix(c(x1, x2, x3), ncol = 3)
test_that("interaction constraints for regression", {
# Fit a model that only allows interaction between x1 and x2
bst <- xgboost(data = train, label = y, max_depth = 3,
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
interaction_constraints = list(c(0, 1)))
bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 3,
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
interaction_constraints = list(c(0, 1)))
# Set all observations to have the same x3 values then increment
# by the same amount

View File

@@ -98,15 +98,14 @@ test_that("SHAP contribution values are not NAN", {
ivs <- c("x1", "x2")
fit <- xgboost(
fit <- xgb.train(
verbose = 0,
params = list(
objective = "reg:squarederror",
eval_metric = "rmse",
nthread = n_threads
),
data = as.matrix(subset(d, fold == 2)[, ivs]),
label = subset(d, fold == 2)$y,
data = xgb.DMatrix(as.matrix(subset(d, fold == 2)[, ivs]), label = subset(d, fold == 2)$y),
nrounds = 3
)
@@ -169,9 +168,8 @@ test_that("multiclass feature interactions work", {
test_that("SHAP single sample works", {
train <- agaricus.train
test <- agaricus.test
booster <- xgboost(
data = train$data,
label = train$label,
booster <- xgb.train(
data = xgb.DMatrix(train$data, label = train$label),
max_depth = 2,
nrounds = 4,
objective = "binary:logistic",

View File

@@ -7,8 +7,8 @@ test <- agaricus.test
test_that("load/save raw works", {
nrounds <- 8
booster <- xgboost(
data = train$data, label = train$label,
booster <- xgb.train(
data = xgb.DMatrix(train$data, label = train$label),
nrounds = nrounds, objective = "binary:logistic",
nthread = 2
)

View File

@@ -7,9 +7,9 @@ train <- matrix(x, ncol = 1)
test_that("monotone constraints for regression", {
bst <- xgboost(data = train, label = y, max_depth = 2,
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
monotone_constraints = -1)
bst <- xgb.train(data = xgb.DMatrix(train, label = y), max_depth = 2,
eta = 0.1, nthread = 2, nrounds = 100, verbose = 0,
monotone_constraints = -1)
pred <- predict(bst, train)

View File

@@ -10,13 +10,13 @@ dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = 2
)
bst <- xgboost(data = dtrain,
max_depth = 2,
eta = 1,
nrounds = 10,
nthread = 1,
verbose = 0,
objective = "binary:logistic")
bst <- xgb.train(data = dtrain,
max_depth = 2,
eta = 1,
nrounds = 10,
nthread = 1,
verbose = 0,
objective = "binary:logistic")
test_that("call is exposed to R", {
expect_false(is.null(bst$call))

View File

@@ -4,8 +4,8 @@ set.seed(1994)
test_that("Poisson regression works", {
data(mtcars)
bst <- xgboost(
data = as.matrix(mtcars[, -11]), label = mtcars[, 11],
bst <- xgb.train(
data = xgb.DMatrix(as.matrix(mtcars[, -11]), label = mtcars[, 11]),
objective = 'count:poisson', nrounds = 10, verbose = 0, nthread = 2
)
expect_equal(class(bst), "xgb.Booster")

View File

@@ -8,9 +8,9 @@ set.seed(1994)
test_that("Can save and load models with Unicode paths", {
nrounds <- 2
bst <- xgboost(data = train$data, label = train$label, max_depth = 2,
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
eval_metric = "error")
bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic",
eval_metric = "error")
tmpdir <- tempdir()
lapply(c("모델.json", "がうる・ぐら.json", "类继承.ubj"), function(x) {
path <- file.path(tmpdir, x)