[R] Don't write files to user's directory (#9966)

This commit is contained in:
david-cortes 2024-01-08 20:43:48 +01:00 committed by GitHub
parent 7ff6d44efa
commit bed0349954
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 108 additions and 100 deletions

View File

@ -388,13 +388,14 @@ NULL
#' objective = "binary:logistic")
#'
#' # Save as a stand-alone file; load it with xgb.load()
#' xgb.save(bst, 'xgb.model')
#' bst2 <- xgb.load('xgb.model')
#' fname <- file.path(tempdir(), "xgb_model.ubj")
#' xgb.save(bst, fname)
#' bst2 <- xgb.load(fname)
#'
#' # Save as a stand-alone file (JSON); load it with xgb.load()
#' xgb.save(bst, 'xgb.model.json')
#' bst2 <- xgb.load('xgb.model.json')
#' if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
#' fname <- file.path(tempdir(), "xgb_model.json")
#' xgb.save(bst, fname)
#' bst2 <- xgb.load(fname)
#'
#' # Save as a raw byte vector; load it with xgb.load.raw()
#' xgb_bytes <- xgb.save.raw(bst)
@ -405,12 +406,12 @@ NULL
#' # Persist the R object. Here, saveRDS() is okay, since it doesn't persist
#' # xgb.Booster directly. What's being persisted is the future-proof byte representation
#' # as given by xgb.save.raw().
#' saveRDS(obj, 'my_object.rds')
#' fname <- file.path(tempdir(), "my_object.Rds")
#' saveRDS(obj, fname)
#' # Read back the R object
#' obj2 <- readRDS('my_object.rds')
#' obj2 <- readRDS(fname)
#' # Re-construct xgb.Booster object from the bytes
#' bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
#' if (file.exists('my_object.rds')) file.remove('my_object.rds')
#'
#' @name a-compatibility-note-for-saveRDS-save
NULL

View File

@ -118,12 +118,12 @@ xgb.get.handle <- function(object) {
#' objective = "binary:logistic"
#' )
#'
#' saveRDS(bst, "xgb.model.rds")
#' fname <- file.path(tempdir(), "xgb_model.Rds")
#' saveRDS(bst, fname)
#'
#' # Warning: The resulting RDS file is only compatible with the current XGBoost version.
#' # Refer to the section titled "a-compatibility-note-for-saveRDS-save".
#' bst1 <- readRDS("xgb.model.rds")
#' if (file.exists("xgb.model.rds")) file.remove("xgb.model.rds")
#' bst1 <- readRDS(fname)
#' # the handle is invalid:
#' print(bst1$handle)
#'
@ -580,9 +580,9 @@ predict.xgb.Booster.handle <- function(object, ...) {
#' print(xgb.attr(bst, "my_attribute"))
#' xgb.attributes(bst) <- list(a = 123, b = "abc")
#'
#' xgb.save(bst, "xgb.model")
#' bst1 <- xgb.load("xgb.model")
#' if (file.exists("xgb.model")) file.remove("xgb.model")
#' fname <- file.path(tempdir(), "xgb.ubj")
#' xgb.save(bst, fname)
#' bst1 <- xgb.load(fname)
#' print(xgb.attr(bst1, "my_attribute"))
#' print(xgb.attributes(bst1))
#'

View File

@ -56,9 +56,9 @@
#' dtrain <- with(
#' agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
#' fname <- file.path(tempdir(), "xgb.DMatrix.data")
#' xgb.DMatrix.save(dtrain, fname)
#' dtrain <- xgb.DMatrix(fname)
#' @export
xgb.DMatrix <- function(
data,

View File

@ -8,9 +8,9 @@
#' @examples
#' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
#' fname <- file.path(tempdir(), "xgb.DMatrix.data")
#' xgb.DMatrix.save(dtrain, fname)
#' dtrain <- xgb.DMatrix(fname)
#' @export
xgb.DMatrix.save <- function(dmatrix, fname) {
if (typeof(fname) != "character")

View File

@ -38,9 +38,9 @@
#' objective = "binary:logistic"
#' )
#'
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' if (file.exists('xgb.model')) file.remove('xgb.model')
#' fname <- file.path(tempdir(), "xgb.ubj")
#' xgb.save(bst, fname)
#' bst <- xgb.load(fname)
#' @export
xgb.load <- function(modelfile) {
if (is.null(modelfile))

View File

@ -40,9 +40,9 @@
#' nrounds = 2,
#' objective = "binary:logistic"
#' )
#' xgb.save(bst, 'xgb.model')
#' bst <- xgb.load('xgb.model')
#' if (file.exists('xgb.model')) file.remove('xgb.model')
#' fname <- file.path(tempdir(), "xgb.ubj")
#' xgb.save(bst, fname)
#' bst <- xgb.load(fname)
#' @export
xgb.save <- function(model, fname) {
if (typeof(fname) != "character")

View File

@ -55,7 +55,7 @@ message(sprintf("Creating '%s' from '%s'", OUT_DEF_FILE, IN_DLL_FILE))
}
# use objdump to dump all the symbols
OBJDUMP_FILE <- "objdump-out.txt"
OBJDUMP_FILE <- file.path(tempdir(), "objdump-out.txt")
.pipe_shell_command_to_stdout(
command = "objdump"
, args = c("-p", IN_DLL_FILE)

View File

@ -38,13 +38,14 @@ bst <- xgb.train(data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$
objective = "binary:logistic")
# Save as a stand-alone file; load it with xgb.load()
xgb.save(bst, 'xgb.model')
bst2 <- xgb.load('xgb.model')
fname <- file.path(tempdir(), "xgb_model.ubj")
xgb.save(bst, fname)
bst2 <- xgb.load(fname)
# Save as a stand-alone file (JSON); load it with xgb.load()
xgb.save(bst, 'xgb.model.json')
bst2 <- xgb.load('xgb.model.json')
if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
fname <- file.path(tempdir(), "xgb_model.json")
xgb.save(bst, fname)
bst2 <- xgb.load(fname)
# Save as a raw byte vector; load it with xgb.load.raw()
xgb_bytes <- xgb.save.raw(bst)
@ -55,11 +56,11 @@ obj <- list(xgb_model_bytes = xgb.save.raw(bst), description = "My first XGBoost
# Persist the R object. Here, saveRDS() is okay, since it doesn't persist
# xgb.Booster directly. What's being persisted is the future-proof byte representation
# as given by xgb.save.raw().
saveRDS(obj, 'my_object.rds')
fname <- file.path(tempdir(), "my_object.Rds")
saveRDS(obj, fname)
# Read back the R object
obj2 <- readRDS('my_object.rds')
obj2 <- readRDS(fname)
# Re-construct xgb.Booster object from the bytes
bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
if (file.exists('my_object.rds')) file.remove('my_object.rds')
}

View File

@ -45,12 +45,12 @@ bst <- xgboost(
objective = "binary:logistic"
)
saveRDS(bst, "xgb.model.rds")
fname <- file.path(tempdir(), "xgb_model.Rds")
saveRDS(bst, fname)
# Warning: The resulting RDS file is only compatible with the current XGBoost version.
# Refer to the section titled "a-compatibility-note-for-saveRDS-save".
bst1 <- readRDS("xgb.model.rds")
if (file.exists("xgb.model.rds")) file.remove("xgb.model.rds")
bst1 <- readRDS(fname)
# the handle is invalid:
print(bst1$handle)

View File

@ -94,7 +94,7 @@ data.table::setDTthreads(nthread)
dtrain <- with(
agaricus.train, xgb.DMatrix(data, label = label, nthread = nthread)
)
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
fname <- file.path(tempdir(), "xgb.DMatrix.data")
xgb.DMatrix.save(dtrain, fname)
dtrain <- xgb.DMatrix(fname)
}

View File

@ -17,7 +17,7 @@ Save xgb.DMatrix object to binary file
\examples{
data(agaricus.train, package='xgboost')
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
dtrain <- xgb.DMatrix('xgb.DMatrix.data')
if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data')
fname <- file.path(tempdir(), "xgb.DMatrix.data")
xgb.DMatrix.save(dtrain, fname)
dtrain <- xgb.DMatrix(fname)
}

View File

@ -79,9 +79,9 @@ xgb.attr(bst, "my_attribute") <- "my attribute value"
print(xgb.attr(bst, "my_attribute"))
xgb.attributes(bst) <- list(a = 123, b = "abc")
xgb.save(bst, "xgb.model")
bst1 <- xgb.load("xgb.model")
if (file.exists("xgb.model")) file.remove("xgb.model")
fname <- file.path(tempdir(), "xgb.ubj")
xgb.save(bst, fname)
bst1 <- xgb.load(fname)
print(xgb.attr(bst1, "my_attribute"))
print(xgb.attributes(bst1))

View File

@ -43,9 +43,9 @@ bst <- xgb.train(
objective = "binary:logistic"
)
xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model')
if (file.exists('xgb.model')) file.remove('xgb.model')
fname <- file.path(tempdir(), "xgb.ubj")
xgb.save(bst, fname)
bst <- xgb.load(fname)
}
\seealso{
\code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.

View File

@ -46,9 +46,9 @@ bst <- xgb.train(
nrounds = 2,
objective = "binary:logistic"
)
xgb.save(bst, 'xgb.model')
bst <- xgb.load('xgb.model')
if (file.exists('xgb.model')) file.remove('xgb.model')
fname <- file.path(tempdir(), "xgb.ubj")
xgb.save(bst, fname)
bst <- xgb.load(fname)
}
\seealso{
\code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.

View File

@ -330,17 +330,17 @@ test_that("training continuation works", {
}
expect_equal(dim(bst2$evaluation_log), c(2, 2))
# test continuing from a model in file
xgb.save(bst1, "xgboost.json")
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = "xgboost.json")
fname <- file.path(tempdir(), "xgboost.json")
xgb.save(bst1, fname)
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = fname)
if (!windows_flag && !solaris_flag) {
expect_equal(bst$raw, bst2$raw)
}
expect_equal(dim(bst2$evaluation_log), c(2, 2))
file.remove("xgboost.json")
})
test_that("model serialization works", {
out_path <- "model_serialization"
out_path <- file.path(tempdir(), "model_serialization")
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
watchlist <- list(train = dtrain)
param <- list(objective = "binary:logistic", nthread = n_threads)

View File

@ -174,16 +174,17 @@ test_that("cb.reset.parameters works as expected", {
test_that("cb.save.model works as expected", {
files <- c('xgboost_01.json', 'xgboost_02.json', 'xgboost.json')
files <- unname(sapply(files, function(f) file.path(tempdir(), f)))
for (f in files) if (file.exists(f)) file.remove(f)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
save_period = 1, save_name = "xgboost_%02d.json")
expect_true(file.exists('xgboost_01.json'))
expect_true(file.exists('xgboost_02.json'))
b1 <- xgb.load('xgboost_01.json')
save_period = 1, save_name = file.path(tempdir(), "xgboost_%02d.json"))
expect_true(file.exists(files[1]))
expect_true(file.exists(files[2]))
b1 <- xgb.load(files[1])
xgb.parameters(b1) <- list(nthread = 2)
expect_equal(xgb.ntree(b1), 1)
b2 <- xgb.load('xgboost_02.json')
b2 <- xgb.load(files[2])
xgb.parameters(b2) <- list(nthread = 2)
expect_equal(xgb.ntree(b2), 2)
@ -193,9 +194,9 @@ test_that("cb.save.model works as expected", {
# save_period = 0 saves the last iteration's model
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
save_period = 0, save_name = 'xgboost.json')
expect_true(file.exists('xgboost.json'))
b2 <- xgb.load('xgboost.json')
save_period = 0, save_name = file.path(tempdir(), 'xgboost.json'))
expect_true(file.exists(files[3]))
b2 <- xgb.load(files[3])
xgb.config(b2) <- xgb.config(bst)
expect_equal(bst$raw, b2$raw)
@ -225,14 +226,13 @@ test_that("early stopping xgb.train works", {
)
expect_equal(bst$evaluation_log, bst0$evaluation_log)
xgb.save(bst, "model.bin")
loaded <- xgb.load("model.bin")
fname <- file.path(tempdir(), "model.bin")
xgb.save(bst, fname)
loaded <- xgb.load(fname)
expect_false(is.null(loaded$best_iteration))
expect_equal(loaded$best_iteration, bst$best_ntreelimit)
expect_equal(loaded$best_ntreelimit, bst$best_ntreelimit)
file.remove("model.bin")
})
test_that("early stopping using a specific metric works", {

View File

@ -67,20 +67,22 @@ test_that("xgb.DMatrix: NA", {
x[1, "x1"] <- NA
m <- xgb.DMatrix(x, nthread = n_threads)
xgb.DMatrix.save(m, "int.dmatrix")
fname_int <- file.path(tempdir(), "int.dmatrix")
xgb.DMatrix.save(m, fname_int)
x <- matrix(as.numeric(x), nrow = n_samples, ncol = 2)
colnames(x) <- c("x1", "x2")
m <- xgb.DMatrix(x, nthread = n_threads)
xgb.DMatrix.save(m, "float.dmatrix")
fname_float <- file.path(tempdir(), "float.dmatrix")
xgb.DMatrix.save(m, fname_float)
iconn <- file("int.dmatrix", "rb")
fconn <- file("float.dmatrix", "rb")
iconn <- file(fname_int, "rb")
fconn <- file(fname_float, "rb")
expect_equal(file.size("int.dmatrix"), file.size("float.dmatrix"))
expect_equal(file.size(fname_int), file.size(fname_float))
bytes <- file.size("int.dmatrix")
bytes <- file.size(fname_int)
idmatrix <- readBin(iconn, "raw", n = bytes)
fdmatrix <- readBin(fconn, "raw", n = bytes)
@ -90,8 +92,8 @@ test_that("xgb.DMatrix: NA", {
close(iconn)
close(fconn)
file.remove("int.dmatrix")
file.remove("float.dmatrix")
file.remove(fname_int)
file.remove(fname_float)
})
test_that("xgb.DMatrix: saving, loading", {
@ -274,17 +276,19 @@ test_that("xgb.DMatrix: Inf as missing", {
x_nan[2, 1] <- NA_real_
m_inf <- xgb.DMatrix(x_inf, nthread = n_threads, missing = Inf)
xgb.DMatrix.save(m_inf, "inf.dmatrix")
fname_inf <- file.path(tempdir(), "inf.dmatrix")
xgb.DMatrix.save(m_inf, fname_inf)
m_nan <- xgb.DMatrix(x_nan, nthread = n_threads, missing = NA_real_)
xgb.DMatrix.save(m_nan, "nan.dmatrix")
fname_nan <- file.path(tempdir(), "nan.dmatrix")
xgb.DMatrix.save(m_nan, fname_nan)
infconn <- file("inf.dmatrix", "rb")
nanconn <- file("nan.dmatrix", "rb")
infconn <- file(fname_inf, "rb")
nanconn <- file(fname_nan, "rb")
expect_equal(file.size("inf.dmatrix"), file.size("nan.dmatrix"))
expect_equal(file.size(fname_inf), file.size(fname_nan))
bytes <- file.size("inf.dmatrix")
bytes <- file.size(fname_inf)
infdmatrix <- readBin(infconn, "raw", n = bytes)
nandmatrix <- readBin(nanconn, "raw", n = bytes)
@ -294,8 +298,8 @@ test_that("xgb.DMatrix: Inf as missing", {
close(infconn)
close(nanconn)
file.remove("inf.dmatrix")
file.remove("nan.dmatrix")
file.remove(fname_inf)
file.remove(fname_nan)
})
test_that("xgb.DMatrix: error on three-dimensional array", {

View File

@ -217,9 +217,9 @@ test_that("xgb-attribute functionality", {
xgb.attributes(bst.Tree) <- list.val
expect_equal(xgb.attributes(bst.Tree), list.ch)
# serializing:
xgb.save(bst.Tree, 'xgb.model')
bst <- xgb.load('xgb.model')
if (file.exists('xgb.model')) file.remove('xgb.model')
fname <- file.path(tempdir(), "xgb.model")
xgb.save(bst.Tree, fname)
bst <- xgb.load(fname)
expect_equal(xgb.attr(bst, "my_attr"), val)
expect_equal(xgb.attributes(bst), list.ch)
# deletion:
@ -256,15 +256,15 @@ if (grepl('Windows', Sys.info()[['sysname']], fixed = TRUE) ||
test_that("xgb.Booster serializing as R object works", {
.skip_if_vcd_not_available()
saveRDS(bst.Tree, 'xgb.model.rds')
bst <- readRDS('xgb.model.rds')
fname_rds <- file.path(tempdir(), "xgb.model.rds")
saveRDS(bst.Tree, fname_rds)
bst <- readRDS(fname_rds)
dtrain <- xgb.DMatrix(sparse_matrix, label = label, nthread = 2)
expect_equal(predict(bst.Tree, dtrain), predict(bst, dtrain), tolerance = float_tolerance)
expect_equal(xgb.dump(bst.Tree), xgb.dump(bst))
xgb.save(bst, 'xgb.model')
if (file.exists('xgb.model')) file.remove('xgb.model')
bst <- readRDS('xgb.model.rds')
if (file.exists('xgb.model.rds')) file.remove('xgb.model.rds')
fname_bin <- file.path(tempdir(), "xgb.model")
xgb.save(bst, fname_bin)
bst <- readRDS(fname_rds)
nil_ptr <- new("externalptr")
class(nil_ptr) <- "xgb.Booster.handle"
expect_true(identical(bst$handle, nil_ptr))

View File

@ -400,9 +400,10 @@ In simple cases, it will happen because there is nothing better than a linear al
Like saving models, `xgb.DMatrix` object (which groups both dataset and outcome) can also be saved using `xgb.DMatrix.save` function.
```{r DMatrixSave, message=F, warning=F}
xgb.DMatrix.save(dtrain, "dtrain.buffer")
fname <- file.path(tempdir(), "dtrain.buffer")
xgb.DMatrix.save(dtrain, fname)
# to load it in, simply call xgb.DMatrix
dtrain2 <- xgb.DMatrix("dtrain.buffer")
dtrain2 <- xgb.DMatrix(fname)
bst <- xgb.train(
data = dtrain2
, max_depth = 2
@ -415,7 +416,7 @@ bst <- xgb.train(
```
```{r DMatrixDel, include=FALSE}
file.remove("dtrain.buffer")
file.remove(fname)
```
#### Information extraction
@ -466,7 +467,8 @@ Hopefully for you, **XGBoost** implements such functions.
```{r saveModel, message=F, warning=F}
# save model to binary local file
xgb.save(bst, "xgboost.model")
fname <- file.path(tempdir(), "xgb_model.ubj")
xgb.save(bst, fname)
```
> `xgb.save` function should return `r TRUE` if everything goes well and crashes otherwise.
@ -475,7 +477,7 @@ An interesting test to see how identical our saved model is to the original one
```{r loadModel, message=F, warning=F}
# load binary model to R
bst2 <- xgb.load("xgboost.model")
bst2 <- xgb.load(fname)
xgb.parameters(bst2) <- list(nthread = 2)
pred2 <- predict(bst2, test$data)
@ -485,7 +487,7 @@ print(paste("sum(abs(pred2-pred))=", sum(abs(pred2 - pred))))
```{r clean, include=FALSE}
# delete the created model
file.remove("./xgboost.model")
file.remove(fname)
```
> result is `0`? We are good!