[R] On-demand serialization + standardization of attributes (#9924)
--------- Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
This commit is contained in:
@@ -3,7 +3,6 @@
|
||||
## inconsistent is found.
|
||||
pkgs <- c(
|
||||
## CI
|
||||
"caret",
|
||||
"pkgbuild",
|
||||
"roxygen2",
|
||||
"XML",
|
||||
|
||||
@@ -25,10 +25,10 @@ test_that("train and predict binary classification", {
|
||||
"train-error"
|
||||
)
|
||||
expect_equal(class(bst), "xgb.Booster")
|
||||
expect_equal(bst$niter, nrounds)
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
expect_equal(nrow(bst$evaluation_log), nrounds)
|
||||
expect_lt(bst$evaluation_log[, min(train_error)], 0.03)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), nrounds)
|
||||
expect_false(is.null(attributes(bst)$evaluation_log))
|
||||
expect_equal(nrow(attributes(bst)$evaluation_log), nrounds)
|
||||
expect_lt(attributes(bst)$evaluation_log[, min(train_error)], 0.03)
|
||||
|
||||
pred <- predict(bst, test$data)
|
||||
expect_length(pred, 1611)
|
||||
@@ -36,7 +36,7 @@ test_that("train and predict binary classification", {
|
||||
pred1 <- predict(bst, train$data, ntreelimit = 1)
|
||||
expect_length(pred1, 6513)
|
||||
err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)
|
||||
err_log <- bst$evaluation_log[1, train_error]
|
||||
err_log <- attributes(bst)$evaluation_log[1, train_error]
|
||||
expect_lt(abs(err_pred1 - err_log), 10e-6)
|
||||
|
||||
pred2 <- predict(bst, train$data, iterationrange = c(1, 2))
|
||||
@@ -160,9 +160,9 @@ test_that("train and predict softprob", {
|
||||
),
|
||||
"train-merror"
|
||||
)
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
||||
expect_false(is.null(attributes(bst)$evaluation_log))
|
||||
expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
|
||||
pred <- predict(bst, as.matrix(iris[, -5]))
|
||||
expect_length(pred, nrow(iris) * 3)
|
||||
# row sums add up to total probability of 1:
|
||||
@@ -172,12 +172,12 @@ test_that("train and predict softprob", {
|
||||
expect_equal(as.numeric(t(mpred)), pred)
|
||||
pred_labels <- max.col(mpred) - 1
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(bst$evaluation_log[5, train_merror], err, tolerance = 5e-6)
|
||||
expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)
|
||||
# manually calculate error at the 1st iteration:
|
||||
mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 1)
|
||||
pred_labels <- max.col(mpred) - 1
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(bst$evaluation_log[1, train_merror], err, tolerance = 5e-6)
|
||||
expect_equal(attributes(bst)$evaluation_log[1, train_merror], err, tolerance = 5e-6)
|
||||
|
||||
mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 2))
|
||||
expect_equal(mpred, mpred1)
|
||||
@@ -211,14 +211,14 @@ test_that("train and predict softmax", {
|
||||
),
|
||||
"train-merror"
|
||||
)
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(bst$niter * 3, xgb.ntree(bst))
|
||||
expect_false(is.null(attributes(bst)$evaluation_log))
|
||||
expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
|
||||
|
||||
pred <- predict(bst, as.matrix(iris[, -5]))
|
||||
expect_length(pred, nrow(iris))
|
||||
err <- sum(pred != lb) / length(lb)
|
||||
expect_equal(bst$evaluation_log[5, train_merror], err, tolerance = 5e-6)
|
||||
expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)
|
||||
})
|
||||
|
||||
test_that("train and predict RF", {
|
||||
@@ -232,12 +232,12 @@ test_that("train and predict RF", {
|
||||
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1,
|
||||
watchlist = list(train = xgb.DMatrix(train$data, label = lb))
|
||||
)
|
||||
expect_equal(bst$niter, 1)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 1)
|
||||
expect_equal(xgb.ntree(bst), 20)
|
||||
|
||||
pred <- predict(bst, train$data)
|
||||
pred_err <- sum((pred > 0.5) != lb) / length(lb)
|
||||
expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
|
||||
expect_lt(abs(attributes(bst)$evaluation_log[1, train_error] - pred_err), 10e-6)
|
||||
# expect_lt(pred_err, 0.03)
|
||||
|
||||
pred <- predict(bst, train$data, ntreelimit = 20)
|
||||
@@ -260,18 +260,18 @@ test_that("train and predict RF with softprob", {
|
||||
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5,
|
||||
watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
|
||||
)
|
||||
expect_equal(bst$niter, 15)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 15)
|
||||
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
|
||||
# predict for all iterations:
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
|
||||
expect_equal(dim(pred), c(nrow(iris), 3))
|
||||
pred_labels <- max.col(pred) - 1
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(bst$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)
|
||||
expect_equal(attributes(bst)$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)
|
||||
# predict for 7 iterations and adjust for 4 parallel trees per iteration
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 7 * 4)
|
||||
err <- sum((max.col(pred) - 1) != lb) / length(lb)
|
||||
expect_equal(bst$evaluation_log[7, train_merror], err, tolerance = 5e-6)
|
||||
expect_equal(attributes(bst)$evaluation_log[7, train_merror], err, tolerance = 5e-6)
|
||||
})
|
||||
|
||||
test_that("use of multiple eval metrics works", {
|
||||
@@ -284,9 +284,9 @@ test_that("use of multiple eval metrics works", {
|
||||
),
|
||||
"train-error.*train-auc.*train-logloss"
|
||||
)
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
expect_equal(dim(bst$evaluation_log), c(2, 4))
|
||||
expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
||||
expect_false(is.null(attributes(bst)$evaluation_log))
|
||||
expect_equal(dim(attributes(bst)$evaluation_log), c(2, 4))
|
||||
expect_equal(colnames(attributes(bst)$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
||||
expect_output(
|
||||
bst2 <- xgb.train(
|
||||
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
|
||||
@@ -296,9 +296,9 @@ test_that("use of multiple eval metrics works", {
|
||||
),
|
||||
"train-error.*train-auc.*train-logloss"
|
||||
)
|
||||
expect_false(is.null(bst2$evaluation_log))
|
||||
expect_equal(dim(bst2$evaluation_log), c(2, 4))
|
||||
expect_equal(colnames(bst2$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
||||
expect_false(is.null(attributes(bst2)$evaluation_log))
|
||||
expect_equal(dim(attributes(bst2)$evaluation_log), c(2, 4))
|
||||
expect_equal(colnames(attributes(bst2)$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss"))
|
||||
})
|
||||
|
||||
|
||||
@@ -318,41 +318,25 @@ test_that("training continuation works", {
|
||||
# continue for two more:
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
|
||||
if (!windows_flag && !solaris_flag) {
|
||||
expect_equal(bst$raw, bst2$raw)
|
||||
expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
|
||||
}
|
||||
expect_false(is.null(bst2$evaluation_log))
|
||||
expect_equal(dim(bst2$evaluation_log), c(4, 2))
|
||||
expect_equal(bst2$evaluation_log, bst$evaluation_log)
|
||||
expect_false(is.null(attributes(bst2)$evaluation_log))
|
||||
expect_equal(dim(attributes(bst2)$evaluation_log), c(4, 2))
|
||||
expect_equal(attributes(bst2)$evaluation_log, attributes(bst)$evaluation_log)
|
||||
# test continuing from raw model data
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1$raw)
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
|
||||
if (!windows_flag && !solaris_flag) {
|
||||
expect_equal(bst$raw, bst2$raw)
|
||||
expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
|
||||
}
|
||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||
expect_equal(dim(attributes(bst2)$evaluation_log), c(2, 2))
|
||||
# test continuing from a model in file
|
||||
fname <- file.path(tempdir(), "xgboost.json")
|
||||
xgb.save(bst1, fname)
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = fname)
|
||||
if (!windows_flag && !solaris_flag) {
|
||||
expect_equal(bst$raw, bst2$raw)
|
||||
expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
|
||||
}
|
||||
expect_equal(dim(bst2$evaluation_log), c(2, 2))
|
||||
})
|
||||
|
||||
test_that("model serialization works", {
|
||||
out_path <- file.path(tempdir(), "model_serialization")
|
||||
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
|
||||
watchlist <- list(train = dtrain)
|
||||
param <- list(objective = "binary:logistic", nthread = n_threads)
|
||||
booster <- xgb.train(param, dtrain, nrounds = 4, watchlist)
|
||||
raw <- xgb.serialize(booster)
|
||||
saveRDS(raw, out_path)
|
||||
raw <- readRDS(out_path)
|
||||
|
||||
loaded <- xgb.unserialize(raw)
|
||||
raw_from_loaded <- xgb.serialize(loaded)
|
||||
expect_equal(raw, raw_from_loaded)
|
||||
file.remove(out_path)
|
||||
expect_equal(dim(attributes(bst2)$evaluation_log), c(2, 2))
|
||||
})
|
||||
|
||||
test_that("xgb.cv works", {
|
||||
@@ -455,8 +439,8 @@ test_that("max_delta_step works", {
|
||||
# model with restricted max_delta_step
|
||||
bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1)
|
||||
# the no-restriction model is expected to have consistently lower loss during the initial iterations
|
||||
expect_true(all(bst1$evaluation_log$train_logloss < bst2$evaluation_log$train_logloss))
|
||||
expect_lt(mean(bst1$evaluation_log$train_logloss) / mean(bst2$evaluation_log$train_logloss), 0.8)
|
||||
expect_true(all(attributes(bst1)$evaluation_log$train_logloss < attributes(bst2)$evaluation_log$train_logloss))
|
||||
expect_lt(mean(attributes(bst1)$evaluation_log$train_logloss) / mean(attributes(bst2)$evaluation_log$train_logloss), 0.8)
|
||||
})
|
||||
|
||||
test_that("colsample_bytree works", {
|
||||
@@ -675,3 +659,131 @@ test_that("Can use ranking objectives with either 'qid' or 'group'", {
|
||||
pred_gr <- predict(model_gr, x)
|
||||
expect_equal(pred_qid, pred_gr)
|
||||
})
|
||||
|
||||
test_that("Coefficients from gblinear have the expected shape and names", {
|
||||
# Single-column coefficients
|
||||
data(mtcars)
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
mm <- model.matrix(~., data = mtcars[, -1])
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
model <- xgb.train(
|
||||
data = dm,
|
||||
params = list(
|
||||
booster = "gblinear",
|
||||
nthread = 1
|
||||
),
|
||||
nrounds = 3
|
||||
)
|
||||
coefs <- coef(model)
|
||||
expect_equal(length(coefs), ncol(x) + 1)
|
||||
expect_equal(names(coefs), c("(Intercept)", colnames(x)))
|
||||
pred_auto <- predict(model, x)
|
||||
pred_manual <- as.numeric(mm %*% coefs)
|
||||
expect_equal(pred_manual, pred_auto, tolerance = 1e-5)
|
||||
|
||||
# Multi-column coefficients
|
||||
data(iris)
|
||||
y <- as.numeric(iris$Species) - 1
|
||||
x <- as.matrix(iris[, -5])
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
mm <- model.matrix(~., data = iris[, -5])
|
||||
model <- xgb.train(
|
||||
data = dm,
|
||||
params = list(
|
||||
booster = "gblinear",
|
||||
objective = "multi:softprob",
|
||||
num_class = 3,
|
||||
nthread = 1
|
||||
),
|
||||
nrounds = 3
|
||||
)
|
||||
coefs <- coef(model)
|
||||
expect_equal(nrow(coefs), ncol(x) + 1)
|
||||
expect_equal(ncol(coefs), 3)
|
||||
expect_equal(row.names(coefs), c("(Intercept)", colnames(x)))
|
||||
pred_auto <- predict(model, x, outputmargin = TRUE, reshape = TRUE)
|
||||
pred_manual <- unname(mm %*% coefs)
|
||||
expect_equal(pred_manual, pred_auto, tolerance = 1e-7)
|
||||
})
|
||||
|
||||
test_that("Deep copies work as expected", {
|
||||
data(mtcars)
|
||||
y <- mtcars$mpg
|
||||
x <- mtcars[, -1]
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
model <- xgb.train(
|
||||
data = dm,
|
||||
params = list(nthread = 1),
|
||||
nrounds = 3
|
||||
)
|
||||
|
||||
xgb.attr(model, "my_attr") <- 100
|
||||
model_shallow_copy <- model
|
||||
xgb.attr(model_shallow_copy, "my_attr") <- 333
|
||||
attr_orig <- xgb.attr(model, "my_attr")
|
||||
attr_shallow <- xgb.attr(model_shallow_copy, "my_attr")
|
||||
expect_equal(attr_orig, attr_shallow)
|
||||
|
||||
model_deep_copy <- xgb.copy.Booster(model)
|
||||
xgb.attr(model_deep_copy, "my_attr") <- 444
|
||||
attr_orig <- xgb.attr(model, "my_attr")
|
||||
attr_deep <- xgb.attr(model_deep_copy, "my_attr")
|
||||
expect_false(attr_orig == attr_deep)
|
||||
})
|
||||
|
||||
test_that("Pointer comparison works as expected", {
|
||||
library(xgboost)
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
model <- xgb.train(
|
||||
params = list(nthread = 1),
|
||||
data = xgb.DMatrix(x, label = y, nthread = 1),
|
||||
nrounds = 3
|
||||
)
|
||||
|
||||
model_shallow_copy <- model
|
||||
expect_true(xgb.is.same.Booster(model, model_shallow_copy))
|
||||
|
||||
model_deep_copy <- xgb.copy.Booster(model)
|
||||
expect_false(xgb.is.same.Booster(model, model_deep_copy))
|
||||
|
||||
xgb.attr(model_shallow_copy, "my_attr") <- 111
|
||||
expect_equal(xgb.attr(model, "my_attr"), "111")
|
||||
expect_null(xgb.attr(model_deep_copy, "my_attr"))
|
||||
})
|
||||
|
||||
test_that("DMatrix field are set to booster when training", {
|
||||
set.seed(123)
|
||||
y <- rnorm(100)
|
||||
x <- matrix(rnorm(100 * 3), nrow = 100)
|
||||
x[, 2] <- abs(as.integer(x[, 2]))
|
||||
|
||||
dm_unnamed <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
dm_feature_names <- xgb.DMatrix(x, label = y, feature_names = c("a", "b", "c"), nthread = 1)
|
||||
dm_feature_types <- xgb.DMatrix(x, label = y)
|
||||
setinfo(dm_feature_types, "feature_type", c("q", "c", "q"))
|
||||
dm_both <- xgb.DMatrix(x, label = y, feature_names = c("a", "b", "c"), nthread = 1)
|
||||
setinfo(dm_both, "feature_type", c("q", "c", "q"))
|
||||
|
||||
params <- list(nthread = 1)
|
||||
model_unnamed <- xgb.train(data = dm_unnamed, params = params, nrounds = 3)
|
||||
model_feature_names <- xgb.train(data = dm_feature_names, params = params, nrounds = 3)
|
||||
model_feature_types <- xgb.train(data = dm_feature_types, params = params, nrounds = 3)
|
||||
model_both <- xgb.train(data = dm_both, params = params, nrounds = 3)
|
||||
|
||||
expect_null(getinfo(model_unnamed, "feature_name"))
|
||||
expect_equal(getinfo(model_feature_names, "feature_name"), c("a", "b", "c"))
|
||||
expect_null(getinfo(model_feature_types, "feature_name"))
|
||||
expect_equal(getinfo(model_both, "feature_name"), c("a", "b", "c"))
|
||||
|
||||
expect_null(variable.names(model_unnamed))
|
||||
expect_equal(variable.names(model_feature_names), c("a", "b", "c"))
|
||||
expect_null(variable.names(model_feature_types))
|
||||
expect_equal(variable.names(model_both), c("a", "b", "c"))
|
||||
|
||||
expect_null(getinfo(model_unnamed, "feature_type"))
|
||||
expect_null(getinfo(model_feature_names, "feature_type"))
|
||||
expect_equal(getinfo(model_feature_types, "feature_type"), c("q", "c", "q"))
|
||||
expect_equal(getinfo(model_both, "feature_type"), c("q", "c", "q"))
|
||||
})
|
||||
|
||||
@@ -111,9 +111,9 @@ test_that("can store evaluation_log without printing", {
|
||||
expect_silent(
|
||||
bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1, verbose = 0)
|
||||
)
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
expect_false(is.null(bst$evaluation_log$train_error))
|
||||
expect_lt(bst$evaluation_log[, min(train_error)], 0.2)
|
||||
expect_false(is.null(attributes(bst)$evaluation_log))
|
||||
expect_false(is.null(attributes(bst)$evaluation_log$train_error))
|
||||
expect_lt(attributes(bst)$evaluation_log[, min(train_error)], 0.2)
|
||||
})
|
||||
|
||||
test_that("cb.reset.parameters works as expected", {
|
||||
@@ -121,34 +121,34 @@ test_that("cb.reset.parameters works as expected", {
|
||||
# fixed eta
|
||||
set.seed(111)
|
||||
bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9, verbose = 0)
|
||||
expect_false(is.null(bst0$evaluation_log))
|
||||
expect_false(is.null(bst0$evaluation_log$train_error))
|
||||
expect_false(is.null(attributes(bst0)$evaluation_log))
|
||||
expect_false(is.null(attributes(bst0)$evaluation_log$train_error))
|
||||
|
||||
# same eta but re-set as a vector parameter in the callback
|
||||
set.seed(111)
|
||||
my_par <- list(eta = c(0.9, 0.9))
|
||||
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
|
||||
callbacks = list(cb.reset.parameters(my_par)))
|
||||
expect_false(is.null(bst1$evaluation_log$train_error))
|
||||
expect_equal(bst0$evaluation_log$train_error,
|
||||
bst1$evaluation_log$train_error)
|
||||
expect_false(is.null(attributes(bst1)$evaluation_log$train_error))
|
||||
expect_equal(attributes(bst0)$evaluation_log$train_error,
|
||||
attributes(bst1)$evaluation_log$train_error)
|
||||
|
||||
# same eta but re-set via a function in the callback
|
||||
set.seed(111)
|
||||
my_par <- list(eta = function(itr, itr_end) 0.9)
|
||||
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
|
||||
callbacks = list(cb.reset.parameters(my_par)))
|
||||
expect_false(is.null(bst2$evaluation_log$train_error))
|
||||
expect_equal(bst0$evaluation_log$train_error,
|
||||
bst2$evaluation_log$train_error)
|
||||
expect_false(is.null(attributes(bst2)$evaluation_log$train_error))
|
||||
expect_equal(attributes(bst0)$evaluation_log$train_error,
|
||||
attributes(bst2)$evaluation_log$train_error)
|
||||
|
||||
# different eta re-set as a vector parameter in the callback
|
||||
set.seed(111)
|
||||
my_par <- list(eta = c(0.6, 0.5))
|
||||
bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
|
||||
callbacks = list(cb.reset.parameters(my_par)))
|
||||
expect_false(is.null(bst3$evaluation_log$train_error))
|
||||
expect_false(all(bst0$evaluation_log$train_error == bst3$evaluation_log$train_error))
|
||||
expect_false(is.null(attributes(bst3)$evaluation_log$train_error))
|
||||
expect_false(all(attributes(bst0)$evaluation_log$train_error == attributes(bst3)$evaluation_log$train_error))
|
||||
|
||||
# resetting multiple parameters at the same time runs with no error
|
||||
my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8))
|
||||
@@ -166,8 +166,8 @@ test_that("cb.reset.parameters works as expected", {
|
||||
my_par <- list(eta = c(0., 0.))
|
||||
bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
|
||||
callbacks = list(cb.reset.parameters(my_par)))
|
||||
expect_false(is.null(bstX$evaluation_log$train_error))
|
||||
er <- unique(bstX$evaluation_log$train_error)
|
||||
expect_false(is.null(attributes(bstX)$evaluation_log$train_error))
|
||||
er <- unique(attributes(bstX)$evaluation_log$train_error)
|
||||
expect_length(er, 1)
|
||||
expect_gt(er, 0.4)
|
||||
})
|
||||
@@ -183,14 +183,14 @@ test_that("cb.save.model works as expected", {
|
||||
expect_true(file.exists(files[2]))
|
||||
b1 <- xgb.load(files[1])
|
||||
xgb.parameters(b1) <- list(nthread = 2)
|
||||
expect_equal(xgb.ntree(b1), 1)
|
||||
expect_equal(xgb.get.num.boosted.rounds(b1), 1)
|
||||
b2 <- xgb.load(files[2])
|
||||
xgb.parameters(b2) <- list(nthread = 2)
|
||||
expect_equal(xgb.ntree(b2), 2)
|
||||
expect_equal(xgb.get.num.boosted.rounds(b2), 2)
|
||||
|
||||
xgb.config(b2) <- xgb.config(bst)
|
||||
expect_equal(xgb.config(bst), xgb.config(b2))
|
||||
expect_equal(bst$raw, b2$raw)
|
||||
expect_equal(xgb.save.raw(bst), xgb.save.raw(b2))
|
||||
|
||||
# save_period = 0 saves the last iteration's model
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
|
||||
@@ -198,7 +198,7 @@ test_that("cb.save.model works as expected", {
|
||||
expect_true(file.exists(files[3]))
|
||||
b2 <- xgb.load(files[3])
|
||||
xgb.config(b2) <- xgb.config(bst)
|
||||
expect_equal(bst$raw, b2$raw)
|
||||
expect_equal(xgb.save.raw(bst), xgb.save.raw(b2))
|
||||
|
||||
for (f in files) if (file.exists(f)) file.remove(f)
|
||||
})
|
||||
@@ -209,14 +209,14 @@ test_that("early stopping xgb.train works", {
|
||||
bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
|
||||
early_stopping_rounds = 3, maximize = FALSE)
|
||||
, "Stopping. Best iteration")
|
||||
expect_false(is.null(bst$best_iteration))
|
||||
expect_lt(bst$best_iteration, 19)
|
||||
expect_equal(bst$best_iteration, bst$best_ntreelimit)
|
||||
expect_false(is.null(xgb.attr(bst, "best_iteration")))
|
||||
expect_lt(xgb.attr(bst, "best_iteration"), 19)
|
||||
expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
|
||||
|
||||
pred <- predict(bst, dtest)
|
||||
expect_equal(length(pred), 1611)
|
||||
err_pred <- err(ltest, pred)
|
||||
err_log <- bst$evaluation_log[bst$best_iteration, test_error]
|
||||
err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_error]
|
||||
expect_equal(err_log, err_pred, tolerance = 5e-6)
|
||||
|
||||
set.seed(11)
|
||||
@@ -224,15 +224,15 @@ test_that("early stopping xgb.train works", {
|
||||
bst0 <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
|
||||
early_stopping_rounds = 3, maximize = FALSE, verbose = 0)
|
||||
)
|
||||
expect_equal(bst$evaluation_log, bst0$evaluation_log)
|
||||
expect_equal(attributes(bst)$evaluation_log, attributes(bst0)$evaluation_log)
|
||||
|
||||
fname <- file.path(tempdir(), "model.bin")
|
||||
xgb.save(bst, fname)
|
||||
loaded <- xgb.load(fname)
|
||||
|
||||
expect_false(is.null(loaded$best_iteration))
|
||||
expect_equal(loaded$best_iteration, bst$best_ntreelimit)
|
||||
expect_equal(loaded$best_ntreelimit, bst$best_ntreelimit)
|
||||
expect_false(is.null(xgb.attr(loaded, "best_iteration")))
|
||||
expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
|
||||
expect_equal(xgb.attr(loaded, "best_ntreelimit"), xgb.attr(bst, "best_ntreelimit"))
|
||||
})
|
||||
|
||||
test_that("early stopping using a specific metric works", {
|
||||
@@ -243,14 +243,14 @@ test_that("early stopping using a specific metric works", {
|
||||
callbacks = list(cb.early.stop(stopping_rounds = 3, maximize = FALSE,
|
||||
metric_name = 'test_logloss')))
|
||||
, "Stopping. Best iteration")
|
||||
expect_false(is.null(bst$best_iteration))
|
||||
expect_lt(bst$best_iteration, 19)
|
||||
expect_equal(bst$best_iteration, bst$best_ntreelimit)
|
||||
expect_false(is.null(xgb.attr(bst, "best_iteration")))
|
||||
expect_lt(xgb.attr(bst, "best_iteration"), 19)
|
||||
expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
|
||||
|
||||
pred <- predict(bst, dtest, ntreelimit = bst$best_ntreelimit)
|
||||
pred <- predict(bst, dtest, ntreelimit = xgb.attr(bst, "best_ntreelimit"))
|
||||
expect_equal(length(pred), 1611)
|
||||
logloss_pred <- sum(-ltest * log(pred) - (1 - ltest) * log(1 - pred)) / length(ltest)
|
||||
logloss_log <- bst$evaluation_log[bst$best_iteration, test_logloss]
|
||||
logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_logloss]
|
||||
expect_equal(logloss_log, logloss_pred, tolerance = 1e-5)
|
||||
})
|
||||
|
||||
|
||||
@@ -35,9 +35,9 @@ num_round <- 2
|
||||
test_that("custom objective works", {
|
||||
bst <- xgb.train(param, dtrain, num_round, watchlist)
|
||||
expect_equal(class(bst), "xgb.Booster")
|
||||
expect_false(is.null(bst$evaluation_log))
|
||||
expect_false(is.null(bst$evaluation_log$eval_error))
|
||||
expect_lt(bst$evaluation_log[num_round, eval_error], 0.03)
|
||||
expect_false(is.null(attributes(bst)$evaluation_log))
|
||||
expect_false(is.null(attributes(bst)$evaluation_log$eval_error))
|
||||
expect_lt(attributes(bst)$evaluation_log[num_round, eval_error], 0.03)
|
||||
})
|
||||
|
||||
test_that("custom objective in CV works", {
|
||||
@@ -50,7 +50,7 @@ test_that("custom objective in CV works", {
|
||||
test_that("custom objective with early stop works", {
|
||||
bst <- xgb.train(param, dtrain, 10, watchlist)
|
||||
expect_equal(class(bst), "xgb.Booster")
|
||||
train_log <- bst$evaluation_log$train_error
|
||||
train_log <- attributes(bst)$evaluation_log$train_error
|
||||
expect_true(all(diff(train_log) <= 0))
|
||||
})
|
||||
|
||||
|
||||
@@ -24,28 +24,28 @@ test_that("gblinear works", {
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
|
||||
ypred <- predict(bst, dtest)
|
||||
expect_equal(length(getinfo(dtest, 'label')), 1611)
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
|
||||
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic',
|
||||
callbacks = list(cb.gblinear.history()))
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
|
||||
h <- xgb.gblinear.history(bst)
|
||||
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
|
||||
expect_is(h, "matrix")
|
||||
|
||||
param$updater <- 'coord_descent'
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
|
||||
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
|
||||
|
||||
bst <- xgb.train(param, dtrain, 2, watchlist, verbose = VERB, feature_selector = 'greedy')
|
||||
expect_lt(bst$evaluation_log$eval_error[2], ERR_UL)
|
||||
expect_lt(attributes(bst)$evaluation_log$eval_error[2], ERR_UL)
|
||||
|
||||
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'thrifty',
|
||||
top_k = 50, callbacks = list(cb.gblinear.history(sparse = TRUE)))
|
||||
expect_lt(bst$evaluation_log$eval_error[n], ERR_UL)
|
||||
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
|
||||
h <- xgb.gblinear.history(bst)
|
||||
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
|
||||
expect_s4_class(h, "dgCMatrix")
|
||||
@@ -72,10 +72,10 @@ test_that("gblinear early stopping works", {
|
||||
booster <- xgb.train(
|
||||
param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
|
||||
)
|
||||
expect_equal(booster$best_iteration, 5)
|
||||
expect_equal(xgb.attr(booster, "best_iteration"), 5)
|
||||
predt_es <- predict(booster, dtrain)
|
||||
|
||||
n <- booster$best_iteration + es_round
|
||||
n <- xgb.attr(booster, "best_iteration") + es_round
|
||||
booster <- xgb.train(
|
||||
param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
|
||||
)
|
||||
|
||||
@@ -49,6 +49,9 @@ mbst.GLM <- xgb.train(data = xgb.DMatrix(as.matrix(iris[, -5]), label = mlabel),
|
||||
booster = "gblinear", eta = 0.1, nthread = 1, nrounds = nrounds,
|
||||
objective = "multi:softprob", num_class = nclass, base_score = 0)
|
||||
|
||||
# without feature names
|
||||
bst.Tree.unnamed <- xgb.copy.Booster(bst.Tree)
|
||||
setinfo(bst.Tree.unnamed, "feature_name", NULL)
|
||||
|
||||
test_that("xgb.dump works", {
|
||||
.skip_if_vcd_not_available()
|
||||
@@ -204,7 +207,7 @@ test_that("xgb-attribute functionality", {
|
||||
list.ch <- list.val[order(names(list.val))]
|
||||
list.ch <- lapply(list.ch, as.character)
|
||||
# note: iter is 0-index in xgb attributes
|
||||
list.default <- list(niter = as.character(nrounds - 1))
|
||||
list.default <- list()
|
||||
list.ch <- c(list.ch, list.default)
|
||||
# proper input:
|
||||
expect_error(xgb.attr(bst.Tree, NULL))
|
||||
@@ -212,24 +215,25 @@ test_that("xgb-attribute functionality", {
|
||||
# set & get:
|
||||
expect_null(xgb.attr(bst.Tree, "asdf"))
|
||||
expect_equal(xgb.attributes(bst.Tree), list.default)
|
||||
xgb.attr(bst.Tree, "my_attr") <- val
|
||||
expect_equal(xgb.attr(bst.Tree, "my_attr"), val)
|
||||
xgb.attributes(bst.Tree) <- list.val
|
||||
expect_equal(xgb.attributes(bst.Tree), list.ch)
|
||||
bst.Tree.copy <- xgb.copy.Booster(bst.Tree)
|
||||
xgb.attr(bst.Tree.copy, "my_attr") <- val
|
||||
expect_equal(xgb.attr(bst.Tree.copy, "my_attr"), val)
|
||||
xgb.attributes(bst.Tree.copy) <- list.val
|
||||
expect_equal(xgb.attributes(bst.Tree.copy), list.ch)
|
||||
# serializing:
|
||||
fname <- file.path(tempdir(), "xgb.model")
|
||||
xgb.save(bst.Tree, fname)
|
||||
fname <- file.path(tempdir(), "xgb.ubj")
|
||||
xgb.save(bst.Tree.copy, fname)
|
||||
bst <- xgb.load(fname)
|
||||
expect_equal(xgb.attr(bst, "my_attr"), val)
|
||||
expect_equal(xgb.attributes(bst), list.ch)
|
||||
# deletion:
|
||||
xgb.attr(bst, "my_attr") <- NULL
|
||||
expect_null(xgb.attr(bst, "my_attr"))
|
||||
expect_equal(xgb.attributes(bst), list.ch[c("a", "b", "niter")])
|
||||
expect_equal(xgb.attributes(bst), list.ch[c("a", "b")])
|
||||
xgb.attributes(bst) <- list(a = NULL, b = NULL)
|
||||
expect_equal(xgb.attributes(bst), list.default)
|
||||
xgb.attributes(bst) <- list(niter = NULL)
|
||||
expect_null(xgb.attributes(bst))
|
||||
expect_equal(xgb.attributes(bst), list())
|
||||
})
|
||||
|
||||
if (grepl('Windows', Sys.info()[['sysname']], fixed = TRUE) ||
|
||||
@@ -262,21 +266,17 @@ test_that("xgb.Booster serializing as R object works", {
|
||||
dtrain <- xgb.DMatrix(sparse_matrix, label = label, nthread = 2)
|
||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
||||
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
|
||||
|
||||
fname_bin <- file.path(tempdir(), "xgb.model")
|
||||
xgb.save(bst, fname_bin)
|
||||
bst <- readRDS(fname_rds)
|
||||
nil_ptr <- new("externalptr")
|
||||
class(nil_ptr) <- "xgb.Booster.handle"
|
||||
expect_true(identical(bst$handle, nil_ptr))
|
||||
bst <- xgb.Booster.complete(bst)
|
||||
expect_true(!identical(bst$handle, nil_ptr))
|
||||
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
|
||||
})
|
||||
|
||||
test_that("xgb.model.dt.tree works with and without feature names", {
|
||||
.skip_if_vcd_not_available()
|
||||
names.dt.trees <- c("Tree", "Node", "ID", "Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover")
|
||||
dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree)
|
||||
dt.tree <- xgb.model.dt.tree(model = bst.Tree)
|
||||
expect_equal(names.dt.trees, names(dt.tree))
|
||||
if (!flag_32bit)
|
||||
expect_equal(dim(dt.tree), c(188, 10))
|
||||
@@ -286,9 +286,7 @@ test_that("xgb.model.dt.tree works with and without feature names", {
|
||||
expect_equal(dt.tree, dt.tree.0)
|
||||
|
||||
# when model contains no feature names:
|
||||
bst.Tree.x <- bst.Tree
|
||||
bst.Tree.x$feature_names <- NULL
|
||||
dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.x)
|
||||
dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.unnamed)
|
||||
expect_output(str(dt.tree.x), 'Feature.*\\"3\\"')
|
||||
expect_equal(dt.tree[, -4, with = FALSE], dt.tree.x[, -4, with = FALSE])
|
||||
|
||||
@@ -316,9 +314,7 @@ test_that("xgb.importance works with and without feature names", {
|
||||
expect_equal(importance.Tree, importance.Tree.0, tolerance = float_tolerance)
|
||||
|
||||
# when model contains no feature names:
|
||||
bst.Tree.x <- bst.Tree
|
||||
bst.Tree.x$feature_names <- NULL
|
||||
importance.Tree.x <- xgb.importance(model = bst.Tree)
|
||||
importance.Tree.x <- xgb.importance(model = bst.Tree.unnamed)
|
||||
expect_equal(importance.Tree[, -1, with = FALSE], importance.Tree.x[, -1, with = FALSE],
|
||||
tolerance = float_tolerance)
|
||||
|
||||
@@ -334,7 +330,7 @@ test_that("xgb.importance works with and without feature names", {
|
||||
importance <- xgb.importance(feature_names = feature.names, model = bst.Tree, trees = trees)
|
||||
|
||||
importance_from_dump <- function() {
|
||||
model_text_dump <- xgb.dump(model = bst.Tree, with_stats = TRUE, trees = trees)
|
||||
model_text_dump <- xgb.dump(model = bst.Tree.unnamed, with_stats = TRUE, trees = trees)
|
||||
imp <- xgb.model.dt.tree(
|
||||
feature_names = feature.names,
|
||||
text = model_text_dump,
|
||||
@@ -414,13 +410,13 @@ test_that("xgb.plot.importance de-duplicates features", {
|
||||
|
||||
test_that("xgb.plot.tree works with and without feature names", {
|
||||
.skip_if_vcd_not_available()
|
||||
expect_silent(xgb.plot.tree(feature_names = feature.names, model = bst.Tree))
|
||||
expect_silent(xgb.plot.tree(feature_names = feature.names, model = bst.Tree.unnamed))
|
||||
expect_silent(xgb.plot.tree(model = bst.Tree))
|
||||
})
|
||||
|
||||
test_that("xgb.plot.multi.trees works with and without feature names", {
|
||||
.skip_if_vcd_not_available()
|
||||
xgb.plot.multi.trees(model = bst.Tree, feature_names = feature.names, features_keep = 3)
|
||||
xgb.plot.multi.trees(model = bst.Tree.unnamed, feature_names = feature.names, features_keep = 3)
|
||||
xgb.plot.multi.trees(model = bst.Tree, features_keep = 3)
|
||||
})
|
||||
|
||||
|
||||
@@ -17,8 +17,8 @@ test_that("load/save raw works", {
|
||||
ubj_bytes <- xgb.save.raw(booster, raw_format = "ubj")
|
||||
old_bytes <- xgb.save.raw(booster, raw_format = "deprecated")
|
||||
|
||||
from_json <- xgb.load.raw(json_bytes, as_booster = TRUE)
|
||||
from_ubj <- xgb.load.raw(ubj_bytes, as_booster = TRUE)
|
||||
from_json <- xgb.load.raw(json_bytes)
|
||||
from_ubj <- xgb.load.raw(ubj_bytes)
|
||||
|
||||
json2old <- xgb.save.raw(from_json, raw_format = "deprecated")
|
||||
ubj2old <- xgb.save.raw(from_ubj, raw_format = "deprecated")
|
||||
@@ -26,3 +26,46 @@ test_that("load/save raw works", {
|
||||
expect_equal(json2old, ubj2old)
|
||||
expect_equal(json2old, old_bytes)
|
||||
})
|
||||
|
||||
test_that("saveRDS preserves C and R attributes", {
|
||||
data(mtcars)
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
model <- xgb.train(
|
||||
data = dm,
|
||||
params = list(nthread = 1, max_depth = 2),
|
||||
nrounds = 5
|
||||
)
|
||||
attributes(model)$my_attr <- "qwerty"
|
||||
xgb.attr(model, "c_attr") <- "asdf"
|
||||
|
||||
fname <- file.path(tempdir(), "xgb_model.Rds")
|
||||
saveRDS(model, fname)
|
||||
model_new <- readRDS(fname)
|
||||
|
||||
expect_equal(attributes(model_new)$my_attr, attributes(model)$my_attr)
|
||||
expect_equal(xgb.attr(model, "c_attr"), xgb.attr(model_new, "c_attr"))
|
||||
})
|
||||
|
||||
test_that("R serializers keep C config", {
|
||||
data(mtcars)
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
model <- xgb.train(
|
||||
data = dm,
|
||||
params = list(
|
||||
tree_method = "approx",
|
||||
nthread = 1,
|
||||
max_depth = 2
|
||||
),
|
||||
nrounds = 3
|
||||
)
|
||||
model_new <- unserialize(serialize(model, NULL))
|
||||
expect_equal(
|
||||
xgb.config(model)$learner$gradient_booster$gbtree_train_param$tree_method,
|
||||
xgb.config(model_new)$learner$gradient_booster$gbtree_train_param$tree_method
|
||||
)
|
||||
expect_equal(variable.names(model), variable.names(model_new))
|
||||
})
|
||||
|
||||
@@ -23,11 +23,7 @@ get_num_tree <- function(booster) {
|
||||
}
|
||||
|
||||
run_booster_check <- function(booster, name) {
|
||||
# If given a handle, we need to call xgb.Booster.complete() prior to using xgb.config().
|
||||
if (inherits(booster, "xgb.Booster") && xgboost:::is.null.handle(booster$handle)) {
|
||||
booster <- xgb.Booster.complete(booster)
|
||||
}
|
||||
config <- jsonlite::fromJSON(xgb.config(booster))
|
||||
config <- xgb.config(booster)
|
||||
run_model_param_check(config)
|
||||
if (name == 'cls') {
|
||||
testthat::expect_equal(get_num_tree(booster),
|
||||
@@ -76,6 +72,10 @@ test_that("Models from previous versions of XGBoost can be loaded", {
|
||||
name <- m[3]
|
||||
is_rds <- endsWith(model_file, '.rds')
|
||||
is_json <- endsWith(model_file, '.json')
|
||||
# TODO: update this test for new RDS format
|
||||
if (is_rds) {
|
||||
return(NULL)
|
||||
}
|
||||
# Expect an R warning when a model is loaded from RDS and it was generated by version < 1.1.x
|
||||
if (is_rds && compareVersion(model_xgb_ver, '1.1.1.1') < 0) {
|
||||
booster <- readRDS(model_file)
|
||||
|
||||
@@ -19,12 +19,12 @@ bst <- xgb.train(data = dtrain,
|
||||
objective = "binary:logistic")
|
||||
|
||||
test_that("call is exposed to R", {
|
||||
expect_false(is.null(bst$call))
|
||||
expect_is(bst$call, "call")
|
||||
expect_false(is.null(attributes(bst)$call))
|
||||
expect_is(attributes(bst)$call, "call")
|
||||
})
|
||||
|
||||
test_that("params is exposed to R", {
|
||||
model_params <- bst$params
|
||||
model_params <- attributes(bst)$params
|
||||
expect_is(model_params, "list")
|
||||
expect_equal(model_params$eta, 1)
|
||||
expect_equal(model_params$max_depth, 2)
|
||||
|
||||
@@ -17,8 +17,8 @@ test_that('Test ranking with unweighted data', {
|
||||
eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
|
||||
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
||||
# Check if the metric is monotone increasing
|
||||
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
||||
expect_true(all(diff(bst$evaluation_log$train_aucpr) >= 0))
|
||||
expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
|
||||
expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
|
||||
})
|
||||
|
||||
test_that('Test ranking with weighted data', {
|
||||
@@ -41,8 +41,8 @@ test_that('Test ranking with weighted data', {
|
||||
)
|
||||
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
|
||||
# Check if the metric is monotone increasing
|
||||
expect_true(all(diff(bst$evaluation_log$train_auc) >= 0))
|
||||
expect_true(all(diff(bst$evaluation_log$train_aucpr) >= 0))
|
||||
expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
|
||||
expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
|
||||
for (i in 1:10) {
|
||||
pred <- predict(bst, newdata = dtrain, ntreelimit = i)
|
||||
# is_sorted[i]: is i-th group correctly sorted by the ranking predictor?
|
||||
|
||||
@@ -40,7 +40,12 @@ test_that("updating the model works", {
|
||||
bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0)
|
||||
tr1r <- xgb.model.dt.tree(model = bst1r)
|
||||
# all should be the same when no subsampling
|
||||
expect_equal(bst1$evaluation_log, bst1r$evaluation_log)
|
||||
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1r)$evaluation_log)
|
||||
expect_equal(
|
||||
jsonlite::fromJSON(rawToChar(xgb.save.raw(bst1, raw_format = "json"))),
|
||||
jsonlite::fromJSON(rawToChar(xgb.save.raw(bst1r, raw_format = "json"))),
|
||||
tolerance = 1e-6
|
||||
)
|
||||
if (!win32_flag) {
|
||||
expect_equal(tr1, tr1r, tolerance = 0.00001, check.attributes = FALSE)
|
||||
}
|
||||
@@ -51,7 +56,7 @@ test_that("updating the model works", {
|
||||
bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0)
|
||||
tr2r <- xgb.model.dt.tree(model = bst2r)
|
||||
# should be the same evaluation but different gains and larger cover
|
||||
expect_equal(bst2$evaluation_log, bst2r$evaluation_log)
|
||||
expect_equal(attributes(bst2)$evaluation_log, attributes(bst2r)$evaluation_log)
|
||||
if (!win32_flag) {
|
||||
expect_equal(tr2[Feature == 'Leaf']$Gain, tr2r[Feature == 'Leaf']$Gain)
|
||||
}
|
||||
@@ -59,11 +64,25 @@ test_that("updating the model works", {
|
||||
expect_gt(sum(tr2r$Cover) / sum(tr2$Cover), 1.5)
|
||||
|
||||
# process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
|
||||
set.seed(123)
|
||||
p1u <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = TRUE))
|
||||
bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
|
||||
tr1u <- xgb.model.dt.tree(model = bst1u)
|
||||
# all should be the same when no subsampling
|
||||
expect_equal(bst1$evaluation_log, bst1u$evaluation_log)
|
||||
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
|
||||
expect_equal(
|
||||
jsonlite::fromJSON(rawToChar(xgb.save.raw(bst1, raw_format = "json"))),
|
||||
jsonlite::fromJSON(rawToChar(xgb.save.raw(bst1u, raw_format = "json"))),
|
||||
tolerance = 1e-6
|
||||
)
|
||||
expect_equal(tr1, tr1u, tolerance = 0.00001, check.attributes = FALSE)
|
||||
|
||||
# same thing but with a serialized model
|
||||
set.seed(123)
|
||||
bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
|
||||
tr1u <- xgb.model.dt.tree(model = bst1u)
|
||||
# all should be the same when no subsampling
|
||||
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
|
||||
expect_equal(tr1, tr1u, tolerance = 0.00001, check.attributes = FALSE)
|
||||
|
||||
# process type 'update' for model with subsampling, refreshing only the tree stats from training data:
|
||||
@@ -71,12 +90,12 @@ test_that("updating the model works", {
|
||||
bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst2)
|
||||
tr2u <- xgb.model.dt.tree(model = bst2u)
|
||||
# should be the same evaluation but different gains and larger cover
|
||||
expect_equal(bst2$evaluation_log, bst2u$evaluation_log)
|
||||
expect_equal(attributes(bst2)$evaluation_log, attributes(bst2u)$evaluation_log)
|
||||
expect_equal(tr2[Feature == 'Leaf']$Gain, tr2u[Feature == 'Leaf']$Gain)
|
||||
expect_gt(sum(abs(tr2[Feature != 'Leaf']$Gain - tr2u[Feature != 'Leaf']$Gain)), 100)
|
||||
expect_gt(sum(tr2u$Cover) / sum(tr2$Cover), 1.5)
|
||||
# the results should be the same as for the model with an extra 'refresh' updater
|
||||
expect_equal(bst2r$evaluation_log, bst2u$evaluation_log)
|
||||
expect_equal(attributes(bst2r)$evaluation_log, attributes(bst2u)$evaluation_log)
|
||||
if (!win32_flag) {
|
||||
expect_equal(tr2r, tr2u, tolerance = 0.00001, check.attributes = FALSE)
|
||||
}
|
||||
@@ -86,7 +105,7 @@ test_that("updating the model works", {
|
||||
bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
|
||||
tr1ut <- xgb.model.dt.tree(model = bst1ut)
|
||||
# should be the same evaluations but different gains and smaller cover (test data is smaller)
|
||||
expect_equal(bst1$evaluation_log, bst1ut$evaluation_log)
|
||||
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1ut)$evaluation_log)
|
||||
expect_equal(tr1[Feature == 'Leaf']$Gain, tr1ut[Feature == 'Leaf']$Gain)
|
||||
expect_gt(sum(abs(tr1[Feature != 'Leaf']$Gain - tr1ut[Feature != 'Leaf']$Gain)), 100)
|
||||
expect_lt(sum(tr1ut$Cover) / sum(tr1$Cover), 0.5)
|
||||
@@ -106,11 +125,12 @@ test_that("updating works for multiclass & multitree", {
|
||||
|
||||
# run update process for an original model with subsampling
|
||||
p0u <- modifyList(p0, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
|
||||
bst0u <- xgb.train(p0u, dtr, nrounds = bst0$niter, watchlist, xgb_model = bst0, verbose = 0)
|
||||
bst0u <- xgb.train(p0u, dtr, nrounds = xgb.get.num.boosted.rounds(bst0),
|
||||
watchlist, xgb_model = bst0, verbose = 0)
|
||||
tr0u <- xgb.model.dt.tree(model = bst0u)
|
||||
|
||||
# should be the same evaluation but different gains and larger cover
|
||||
expect_equal(bst0$evaluation_log, bst0u$evaluation_log)
|
||||
expect_equal(attributes(bst0)$evaluation_log, attributes(bst0u)$evaluation_log)
|
||||
expect_equal(tr0[Feature == 'Leaf']$Gain, tr0u[Feature == 'Leaf']$Gain)
|
||||
expect_gt(sum(abs(tr0[Feature != 'Leaf']$Gain - tr0u[Feature != 'Leaf']$Gain)), 100)
|
||||
expect_gt(sum(tr0u$Cover) / sum(tr0$Cover), 1.5)
|
||||
|
||||
Reference in New Issue
Block a user