[R] On-demand serialization + standardization of attributes (#9924)
--------- Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
This commit is contained in:
@@ -228,7 +228,7 @@ cb.reset.parameters <- function(new_params) {
|
||||
})
|
||||
|
||||
if (!is.null(env$bst)) {
|
||||
xgb.parameters(env$bst$handle) <- pars
|
||||
xgb.parameters(env$bst) <- pars
|
||||
} else {
|
||||
for (fd in env$bst_folds)
|
||||
xgb.parameters(fd$bst) <- pars
|
||||
@@ -333,13 +333,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
if (!is.null(env$bst)) {
|
||||
if (!inherits(env$bst, 'xgb.Booster'))
|
||||
stop("'bst' in the parent frame must be an 'xgb.Booster'")
|
||||
if (!is.null(best_score <- xgb.attr(env$bst$handle, 'best_score'))) {
|
||||
if (!is.null(best_score <- xgb.attr(env$bst, 'best_score'))) {
|
||||
best_score <<- as.numeric(best_score)
|
||||
best_iteration <<- as.numeric(xgb.attr(env$bst$handle, 'best_iteration')) + 1
|
||||
best_msg <<- as.numeric(xgb.attr(env$bst$handle, 'best_msg'))
|
||||
best_iteration <<- as.numeric(xgb.attr(env$bst, 'best_iteration')) + 1
|
||||
best_msg <<- as.numeric(xgb.attr(env$bst, 'best_msg'))
|
||||
} else {
|
||||
xgb.attributes(env$bst$handle) <- list(best_iteration = best_iteration - 1,
|
||||
best_score = best_score)
|
||||
xgb.attributes(env$bst) <- list(best_iteration = best_iteration - 1,
|
||||
best_score = best_score)
|
||||
}
|
||||
} else if (is.null(env$bst_folds) || is.null(env$basket)) {
|
||||
stop("Parent frame has neither 'bst' nor ('bst_folds' and 'basket')")
|
||||
@@ -348,7 +348,7 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
|
||||
finalizer <- function(env) {
|
||||
if (!is.null(env$bst)) {
|
||||
attr_best_score <- as.numeric(xgb.attr(env$bst$handle, 'best_score'))
|
||||
attr_best_score <- as.numeric(xgb.attr(env$bst, 'best_score'))
|
||||
if (best_score != attr_best_score) {
|
||||
# If the difference is too big, throw an error
|
||||
if (abs(best_score - attr_best_score) >= 1e-14) {
|
||||
@@ -358,9 +358,9 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
# If the difference is due to floating-point truncation, update best_score
|
||||
best_score <- attr_best_score
|
||||
}
|
||||
env$bst$best_iteration <- best_iteration
|
||||
env$bst$best_ntreelimit <- best_ntreelimit
|
||||
env$bst$best_score <- best_score
|
||||
xgb.attr(env$bst, "best_iteration") <- best_iteration
|
||||
xgb.attr(env$bst, "best_ntreelimit") <- best_ntreelimit
|
||||
xgb.attr(env$bst, "best_score") <- best_score
|
||||
} else {
|
||||
env$basket$best_iteration <- best_iteration
|
||||
env$basket$best_ntreelimit <- best_ntreelimit
|
||||
@@ -412,11 +412,15 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
#' @param save_period save the model to disk after every
|
||||
#' \code{save_period} iterations; 0 means save the model at the end.
|
||||
#' @param save_name the name or path for the saved model file.
|
||||
#'
|
||||
#' Note that the format of the model being saved is determined by the file
|
||||
#' extension specified here (see \link{xgb.save} for details about how it works).
|
||||
#'
|
||||
#' It can contain a \code{\link[base]{sprintf}} formatting specifier
|
||||
#' to include the integer iteration number in the file name.
|
||||
#' E.g., with \code{save_name} = 'xgboost_%04d.model',
|
||||
#' the file saved at iteration 50 would be named "xgboost_0050.model".
|
||||
#'
|
||||
#' E.g., with \code{save_name} = 'xgboost_%04d.ubj',
|
||||
#' the file saved at iteration 50 would be named "xgboost_0050.ubj".
|
||||
#' @seealso \link{xgb.save}
|
||||
#' @details
|
||||
#' This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end.
|
||||
#'
|
||||
@@ -430,7 +434,7 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
#' \code{\link{callbacks}}
|
||||
#'
|
||||
#' @export
|
||||
cb.save.model <- function(save_period = 0, save_name = "xgboost.model") {
|
||||
cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
|
||||
|
||||
if (save_period < 0)
|
||||
stop("'save_period' cannot be negative")
|
||||
@@ -440,8 +444,13 @@ cb.save.model <- function(save_period = 0, save_name = "xgboost.model") {
|
||||
stop("'save_model' callback requires the 'bst' booster object in its calling frame")
|
||||
|
||||
if ((save_period > 0 && (env$iteration - env$begin_iteration) %% save_period == 0) ||
|
||||
(save_period == 0 && env$iteration == env$end_iteration))
|
||||
xgb.save(env$bst, sprintf(save_name, env$iteration))
|
||||
(save_period == 0 && env$iteration == env$end_iteration)) {
|
||||
# Note: this throws a warning if the name doesn't have anything to format through 'sprintf'
|
||||
suppressWarnings({
|
||||
save_name <- sprintf(save_name, env$iteration)
|
||||
})
|
||||
xgb.save(env$bst, save_name)
|
||||
}
|
||||
}
|
||||
attr(callback, 'call') <- match.call()
|
||||
attr(callback, 'name') <- 'cb.save.model'
|
||||
@@ -512,8 +521,7 @@ cb.cv.predict <- function(save_models = FALSE) {
|
||||
env$basket$pred <- pred
|
||||
if (save_models) {
|
||||
env$basket$models <- lapply(env$bst_folds, function(fd) {
|
||||
xgb.attr(fd$bst, 'niter') <- env$end_iteration - 1
|
||||
xgb.Booster.complete(xgb.handleToBooster(handle = fd$bst, raw = NULL), saveraw = TRUE)
|
||||
return(fd$bst)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -665,7 +673,7 @@ cb.gblinear.history <- function(sparse = FALSE) {
|
||||
} else { # xgb.cv:
|
||||
cf <- vector("list", length(env$bst_folds))
|
||||
for (i in seq_along(env$bst_folds)) {
|
||||
dmp <- xgb.dump(xgb.handleToBooster(handle = env$bst_folds[[i]]$bst, raw = NULL))
|
||||
dmp <- xgb.dump(env$bst_folds[[i]]$bst)
|
||||
cf[[i]] <- as.numeric(grep('(booster|bias|weigh)', dmp, invert = TRUE, value = TRUE))
|
||||
if (sparse) cf[[i]] <- as(cf[[i]], "sparseVector")
|
||||
}
|
||||
@@ -685,14 +693,19 @@ cb.gblinear.history <- function(sparse = FALSE) {
|
||||
callback
|
||||
}
|
||||
|
||||
#' Extract gblinear coefficients history.
|
||||
#'
|
||||
#' A helper function to extract the matrix of linear coefficients' history
|
||||
#' @title Extract gblinear coefficients history.
|
||||
#' @description A helper function to extract the matrix of linear coefficients' history
|
||||
#' from a gblinear model created while using the \code{cb.gblinear.history()}
|
||||
#' callback.
|
||||
#' @details Note that this is an R-specific function that relies on R attributes that
|
||||
#' are not saved when using xgboost's own serialization functions like \link{xgb.load}
|
||||
#' or \link{xgb.load.raw}.
|
||||
#'
|
||||
#' In order for a serialized model to be accepted by tgis function, one must use R
|
||||
#' serializers such as \link{saveRDS}.
|
||||
#' @param model either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained
|
||||
#' using the \code{cb.gblinear.history()} callback.
|
||||
#' using the \code{cb.gblinear.history()} callback, but \bold{not} a booster
|
||||
#' loaded from \link{xgb.load} or \link{xgb.load.raw}.
|
||||
#' @param class_index zero-based class index to extract the coefficients for only that
|
||||
#' specific class in a multinomial multiclass model. When it is NULL, all the
|
||||
#' coefficients are returned. Has no effect in non-multiclass models.
|
||||
@@ -713,20 +726,18 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
|
||||
stop("model must be an object of either xgb.Booster or xgb.cv.synchronous class")
|
||||
is_cv <- inherits(model, "xgb.cv.synchronous")
|
||||
|
||||
if (is.null(model[["callbacks"]]) || is.null(model$callbacks[["cb.gblinear.history"]]))
|
||||
if (is_cv) {
|
||||
callbacks <- model$callbacks
|
||||
} else {
|
||||
callbacks <- attributes(model)$callbacks
|
||||
}
|
||||
|
||||
if (is.null(callbacks) || is.null(callbacks$cb.gblinear.history))
|
||||
stop("model must be trained while using the cb.gblinear.history() callback")
|
||||
|
||||
if (!is_cv) {
|
||||
# extract num_class & num_feat from the internal model
|
||||
dmp <- xgb.dump(model)
|
||||
if (length(dmp) < 2 || dmp[2] != "bias:")
|
||||
stop("It does not appear to be a gblinear model")
|
||||
dmp <- dmp[-c(1, 2)]
|
||||
n <- which(dmp == 'weight:')
|
||||
if (length(n) != 1)
|
||||
stop("It does not appear to be a gblinear model")
|
||||
num_class <- n - 1
|
||||
num_feat <- (length(dmp) - 4) / num_class
|
||||
num_class <- xgb.num_class(model)
|
||||
num_feat <- xgb.num_feature(model)
|
||||
} else {
|
||||
# in case of CV, the object is expected to have this info
|
||||
if (model$params$booster != "gblinear")
|
||||
@@ -742,7 +753,7 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
|
||||
(class_index[1] < 0 || class_index[1] >= num_class))
|
||||
stop("class_index has to be within [0,", num_class - 1, "]")
|
||||
|
||||
coef_path <- environment(model$callbacks$cb.gblinear.history)[["coefs"]]
|
||||
coef_path <- environment(callbacks$cb.gblinear.history)[["coefs"]]
|
||||
if (!is.null(class_index) && num_class > 1) {
|
||||
coef_path <- if (is.list(coef_path)) {
|
||||
lapply(coef_path,
|
||||
|
||||
@@ -148,19 +148,17 @@ check.custom.eval <- function(env = parent.frame()) {
|
||||
|
||||
|
||||
# Update a booster handle for an iteration with dtrain data
|
||||
xgb.iter.update <- function(booster_handle, dtrain, iter, obj) {
|
||||
if (!identical(class(booster_handle), "xgb.Booster.handle")) {
|
||||
stop("booster_handle must be of xgb.Booster.handle class")
|
||||
}
|
||||
xgb.iter.update <- function(bst, dtrain, iter, obj) {
|
||||
if (!inherits(dtrain, "xgb.DMatrix")) {
|
||||
stop("dtrain must be of xgb.DMatrix class")
|
||||
}
|
||||
handle <- xgb.get.handle(bst)
|
||||
|
||||
if (is.null(obj)) {
|
||||
.Call(XGBoosterUpdateOneIter_R, booster_handle, as.integer(iter), dtrain)
|
||||
.Call(XGBoosterUpdateOneIter_R, handle, as.integer(iter), dtrain)
|
||||
} else {
|
||||
pred <- predict(
|
||||
booster_handle,
|
||||
bst,
|
||||
dtrain,
|
||||
outputmargin = TRUE,
|
||||
training = TRUE,
|
||||
@@ -185,7 +183,7 @@ xgb.iter.update <- function(booster_handle, dtrain, iter, obj) {
|
||||
}
|
||||
|
||||
.Call(
|
||||
XGBoosterTrainOneIter_R, booster_handle, dtrain, iter, grad, hess
|
||||
XGBoosterTrainOneIter_R, handle, dtrain, iter, grad, hess
|
||||
)
|
||||
}
|
||||
return(TRUE)
|
||||
@@ -195,23 +193,22 @@ xgb.iter.update <- function(booster_handle, dtrain, iter, obj) {
|
||||
# Evaluate one iteration.
|
||||
# Returns a named vector of evaluation metrics
|
||||
# with the names in a 'datasetname-metricname' format.
|
||||
xgb.iter.eval <- function(booster_handle, watchlist, iter, feval) {
|
||||
if (!identical(class(booster_handle), "xgb.Booster.handle"))
|
||||
stop("class of booster_handle must be xgb.Booster.handle")
|
||||
xgb.iter.eval <- function(bst, watchlist, iter, feval) {
|
||||
handle <- xgb.get.handle(bst)
|
||||
|
||||
if (length(watchlist) == 0)
|
||||
return(NULL)
|
||||
|
||||
evnames <- names(watchlist)
|
||||
if (is.null(feval)) {
|
||||
msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames))
|
||||
msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), watchlist, as.list(evnames))
|
||||
mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
|
||||
res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
|
||||
} else {
|
||||
res <- sapply(seq_along(watchlist), function(j) {
|
||||
w <- watchlist[[j]]
|
||||
## predict using all trees
|
||||
preds <- predict(booster_handle, w, outputmargin = TRUE, iterationrange = c(1, 1))
|
||||
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = c(1, 1))
|
||||
eval_res <- feval(preds, w)
|
||||
out <- eval_res$value
|
||||
names(out) <- paste0(evnames[j], "-", eval_res$metric)
|
||||
@@ -352,16 +349,45 @@ xgb.createFolds <- function(y, k) {
|
||||
#' @name xgboost-deprecated
|
||||
NULL
|
||||
|
||||
#' Do not use \code{\link[base]{saveRDS}} or \code{\link[base]{save}} for long-term archival of
|
||||
#' models. Instead, use \code{\link{xgb.save}} or \code{\link{xgb.save.raw}}.
|
||||
#' @title Model Serialization and Compatibility
|
||||
#' @description
|
||||
#'
|
||||
#' It is a common practice to use the built-in \code{\link[base]{saveRDS}} function (or
|
||||
#' \code{\link[base]{save}}) to persist R objects to the disk. While it is possible to persist
|
||||
#' \code{xgb.Booster} objects using \code{\link[base]{saveRDS}}, it is not advisable to do so if
|
||||
#' the model is to be accessed in the future. If you train a model with the current version of
|
||||
#' XGBoost and persist it with \code{\link[base]{saveRDS}}, the model is not guaranteed to be
|
||||
#' accessible in later releases of XGBoost. To ensure that your model can be accessed in future
|
||||
#' releases of XGBoost, use \code{\link{xgb.save}} or \code{\link{xgb.save.raw}} instead.
|
||||
#' When it comes to serializing XGBoost models, it's possible to use R serializers such as
|
||||
#' \link{save} or \link{saveRDS} to serialize an XGBoost R model, but XGBoost also provides
|
||||
#' its own serializers with better compatibility guarantees, which allow loading
|
||||
#' said models in other language bindings of XGBoost.
|
||||
#'
|
||||
#' Note that an `xgb.Booster` object, outside of its core components, might also keep:\itemize{
|
||||
#' \item Additional model configuration (accessible through \link{xgb.config}),
|
||||
#' which includes model fitting parameters like `max_depth` and runtime parameters like `nthread`.
|
||||
#' These are not necessarily useful for prediction/importance/plotting.
|
||||
#' \item Additional R-specific attributes - e.g. results of callbacks, such as evaluation logs,
|
||||
#' which are kept as a `data.table` object, accessible through `attributes(model)$evaluation_log`
|
||||
#' if present.
|
||||
#' }
|
||||
#'
|
||||
#' The first one (configurations) does not have the same compatibility guarantees as
|
||||
#' the model itself, including attributes that are set and accessed through \link{xgb.attributes} - that is, such configuration
|
||||
#' might be lost after loading the booster in a different XGBoost version, regardless of the
|
||||
#' serializer that was used. These are saved when using \link{saveRDS}, but will be discarded
|
||||
#' if loaded into an incompatible XGBoost version. They are not saved when using XGBoost's
|
||||
#' serializers from its public interface including \link{xgb.save} and \link{xgb.save.raw}.
|
||||
#'
|
||||
#' The second ones (R attributes) are not part of the standard XGBoost model structure, and thus are
|
||||
#' not saved when using XGBoost's own serializers. These attributes are only used for informational
|
||||
#' purposes, such as keeping track of evaluation metrics as the model was fit, or saving the R
|
||||
#' call that produced the model, but are otherwise not used for prediction / importance / plotting / etc.
|
||||
#' These R attributes are only preserved when using R's serializers.
|
||||
#'
|
||||
#' Note that XGBoost models in R starting from version `2.1.0` and onwards, and XGBoost models
|
||||
#' before version `2.1.0`; have a very different R object structure and are incompatible with
|
||||
#' each other. Hence, models that were saved with R serializers live `saveRDS` or `save` before
|
||||
#' version `2.1.0` will not work with latter `xgboost` versions and vice versa. Be aware that
|
||||
#' the structure of R model objects could in theory change again in the future, so XGBoost's serializers
|
||||
#' should be preferred for long-term storage.
|
||||
#'
|
||||
#' Furthermore, note that using the package `qs` for serialization will require version 0.26 or
|
||||
#' higher of said package, and will have the same compatibility restrictions as R serializers.
|
||||
#'
|
||||
#' @details
|
||||
#' Use \code{\link{xgb.save}} to save the XGBoost model as a stand-alone file. You may opt into
|
||||
@@ -374,9 +400,10 @@ NULL
|
||||
#' The \code{\link{xgb.save.raw}} function is useful if you'd like to persist the XGBoost model
|
||||
#' as part of another R object.
|
||||
#'
|
||||
#' Note: Do not use \code{\link{xgb.serialize}} to store models long-term. It persists not only the
|
||||
#' model but also internal configurations and parameters, and its format is not stable across
|
||||
#' multiple XGBoost versions. Use \code{\link{xgb.serialize}} only for checkpointing.
|
||||
#' Use \link{saveRDS} if you require the R-specific attributes that a booster might have, such
|
||||
#' as evaluation logs, but note that future compatibility of such objects is outside XGBoost's
|
||||
#' control as it relies on R's serialization format (see e.g. the details section in
|
||||
#' \link{serialize} and \link{save} from base R).
|
||||
#'
|
||||
#' For more details and explanation about model persistence and archival, consult the page
|
||||
#' \url{https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html}.
|
||||
|
||||
@@ -1,180 +1,85 @@
|
||||
# Construct an internal xgboost Booster and return a handle to it.
|
||||
# Construct an internal xgboost Booster and get its current number of rounds.
|
||||
# internal utility function
|
||||
xgb.Booster.handle <- function(params, cachelist, modelfile, handle) {
|
||||
# Note: the number of rounds in the C booster gets reset to zero when changing
|
||||
# key booster parameters like 'process_type=update', but in some cases, when
|
||||
# replacing previous iterations, it needs to make a check that the new number
|
||||
# of iterations doesn't exceed the previous ones, hence it keeps track of the
|
||||
# current number of iterations before resetting the parameters in order to
|
||||
# perform the check later on.
|
||||
xgb.Booster <- function(params, cachelist, modelfile) {
|
||||
if (typeof(cachelist) != "list" ||
|
||||
!all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) {
|
||||
stop("cachelist must be a list of xgb.DMatrix objects")
|
||||
}
|
||||
## Load existing model, dispatch for on disk model file and in memory buffer
|
||||
if (!is.null(modelfile)) {
|
||||
if (typeof(modelfile) == "character") {
|
||||
if (is.character(modelfile)) {
|
||||
## A filename
|
||||
handle <- .Call(XGBoosterCreate_R, cachelist)
|
||||
bst <- .Call(XGBoosterCreate_R, cachelist)
|
||||
modelfile <- path.expand(modelfile)
|
||||
.Call(XGBoosterLoadModel_R, handle, enc2utf8(modelfile[1]))
|
||||
class(handle) <- "xgb.Booster.handle"
|
||||
.Call(XGBoosterLoadModel_R, xgb.get.handle(bst), enc2utf8(modelfile[1]))
|
||||
niter <- xgb.get.num.boosted.rounds(bst)
|
||||
if (length(params) > 0) {
|
||||
xgb.parameters(handle) <- params
|
||||
xgb.parameters(bst) <- params
|
||||
}
|
||||
return(handle)
|
||||
} else if (typeof(modelfile) == "raw") {
|
||||
return(list(bst = bst, niter = niter))
|
||||
} else if (is.raw(modelfile)) {
|
||||
## A memory buffer
|
||||
bst <- xgb.unserialize(modelfile, handle)
|
||||
bst <- xgb.load.raw(modelfile)
|
||||
niter <- xgb.get.num.boosted.rounds(bst)
|
||||
xgb.parameters(bst) <- params
|
||||
return(bst)
|
||||
return(list(bst = bst, niter = niter))
|
||||
} else if (inherits(modelfile, "xgb.Booster")) {
|
||||
## A booster object
|
||||
bst <- xgb.Booster.complete(modelfile, saveraw = TRUE)
|
||||
bst <- xgb.unserialize(bst$raw)
|
||||
bst <- .Call(XGDuplicate_R, modelfile)
|
||||
niter <- xgb.get.num.boosted.rounds(bst)
|
||||
xgb.parameters(bst) <- params
|
||||
return(bst)
|
||||
return(list(bst = bst, niter = niter))
|
||||
} else {
|
||||
stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
|
||||
}
|
||||
}
|
||||
## Create new model
|
||||
handle <- .Call(XGBoosterCreate_R, cachelist)
|
||||
class(handle) <- "xgb.Booster.handle"
|
||||
bst <- .Call(XGBoosterCreate_R, cachelist)
|
||||
if (length(params) > 0) {
|
||||
xgb.parameters(handle) <- params
|
||||
xgb.parameters(bst) <- params
|
||||
}
|
||||
return(handle)
|
||||
return(list(bst = bst, niter = 0L))
|
||||
}
|
||||
|
||||
# Convert xgb.Booster.handle to xgb.Booster
|
||||
# internal utility function
|
||||
xgb.handleToBooster <- function(handle, raw) {
|
||||
bst <- list(handle = handle, raw = raw)
|
||||
class(bst) <- "xgb.Booster"
|
||||
return(bst)
|
||||
}
|
||||
|
||||
# Check whether xgb.Booster.handle is null
|
||||
# Check whether xgb.Booster handle is null
|
||||
# internal utility function
|
||||
is.null.handle <- function(handle) {
|
||||
if (is.null(handle)) return(TRUE)
|
||||
|
||||
if (!identical(class(handle), "xgb.Booster.handle"))
|
||||
stop("argument type must be xgb.Booster.handle")
|
||||
if (!inherits(handle, "externalptr"))
|
||||
stop("argument type must be 'externalptr'")
|
||||
|
||||
if (.Call(XGCheckNullPtr_R, handle))
|
||||
return(TRUE)
|
||||
|
||||
return(FALSE)
|
||||
return(.Call(XGCheckNullPtr_R, handle))
|
||||
}
|
||||
|
||||
# Return a verified to be valid handle out of either xgb.Booster.handle or
|
||||
# xgb.Booster internal utility function
|
||||
# Return a verified to be valid handle out of xgb.Booster
|
||||
# internal utility function
|
||||
xgb.get.handle <- function(object) {
|
||||
if (inherits(object, "xgb.Booster")) {
|
||||
handle <- object$handle
|
||||
} else if (inherits(object, "xgb.Booster.handle")) {
|
||||
handle <- object
|
||||
handle <- object$ptr
|
||||
if (is.null(handle) || !inherits(handle, "externalptr")) {
|
||||
stop("'xgb.Booster' object is corrupted or is from an incompatible xgboost version.")
|
||||
}
|
||||
} else {
|
||||
stop("argument must be of either xgb.Booster or xgb.Booster.handle class")
|
||||
stop("argument must be an 'xgb.Booster' object.")
|
||||
}
|
||||
if (is.null.handle(handle)) {
|
||||
stop("invalid xgb.Booster.handle")
|
||||
stop("invalid 'xgb.Booster' (blank 'externalptr').")
|
||||
}
|
||||
handle
|
||||
}
|
||||
|
||||
#' Restore missing parts of an incomplete xgb.Booster object
|
||||
#'
|
||||
#' It attempts to complete an `xgb.Booster` object by restoring either its missing
|
||||
#' raw model memory dump (when it has no `raw` data but its `xgb.Booster.handle` is valid)
|
||||
#' or its missing internal handle (when its `xgb.Booster.handle` is not valid
|
||||
#' but it has a raw Booster memory dump).
|
||||
#'
|
||||
#' @param object Object of class `xgb.Booster`.
|
||||
#' @param saveraw A flag indicating whether to append `raw` Booster memory dump data
|
||||
#' when it doesn't already exist.
|
||||
#'
|
||||
#' @details
|
||||
#'
|
||||
#' While this method is primarily for internal use, it might be useful in some practical situations.
|
||||
#'
|
||||
#' E.g., when an `xgb.Booster` model is saved as an R object and then is loaded as an R object,
|
||||
#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
|
||||
#' should still work for such a model object since those methods would be using
|
||||
#' `xgb.Booster.complete()` internally. However, one might find it to be more efficient to call the
|
||||
#' `xgb.Booster.complete()` function explicitly once after loading a model as an R-object.
|
||||
#' That would prevent further repeated implicit reconstruction of an internal booster model.
|
||||
#'
|
||||
#' @return
|
||||
#' An object of `xgb.Booster` class.
|
||||
#'
|
||||
#' @examples
|
||||
#'
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#'
|
||||
#' bst <- xgboost(
|
||||
#' data = agaricus.train$data,
|
||||
#' label = agaricus.train$label,
|
||||
#' max_depth = 2,
|
||||
#' eta = 1,
|
||||
#' nthread = 2,
|
||||
#' nrounds = 2,
|
||||
#' objective = "binary:logistic"
|
||||
#' )
|
||||
#'
|
||||
#' fname <- file.path(tempdir(), "xgb_model.Rds")
|
||||
#' saveRDS(bst, fname)
|
||||
#'
|
||||
#' # Warning: The resulting RDS file is only compatible with the current XGBoost version.
|
||||
#' # Refer to the section titled "a-compatibility-note-for-saveRDS-save".
|
||||
#' bst1 <- readRDS(fname)
|
||||
#' # the handle is invalid:
|
||||
#' print(bst1$handle)
|
||||
#'
|
||||
#' bst1 <- xgb.Booster.complete(bst1)
|
||||
#' # now the handle points to a valid internal booster model:
|
||||
#' print(bst1$handle)
|
||||
#'
|
||||
#' @export
|
||||
xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||
if (!inherits(object, "xgb.Booster"))
|
||||
stop("argument type must be xgb.Booster")
|
||||
|
||||
if (is.null.handle(object$handle)) {
|
||||
object$handle <- xgb.Booster.handle(
|
||||
params = list(),
|
||||
cachelist = list(),
|
||||
modelfile = object$raw,
|
||||
handle = object$handle
|
||||
)
|
||||
} else {
|
||||
if (is.null(object$raw) && saveraw) {
|
||||
object$raw <- xgb.serialize(object$handle)
|
||||
}
|
||||
}
|
||||
|
||||
attrs <- xgb.attributes(object)
|
||||
if (!is.null(attrs$best_ntreelimit)) {
|
||||
object$best_ntreelimit <- as.integer(attrs$best_ntreelimit)
|
||||
}
|
||||
if (!is.null(attrs$best_iteration)) {
|
||||
## Convert from 0 based back to 1 based.
|
||||
object$best_iteration <- as.integer(attrs$best_iteration) + 1
|
||||
}
|
||||
if (!is.null(attrs$best_score)) {
|
||||
object$best_score <- as.numeric(attrs$best_score)
|
||||
}
|
||||
if (!is.null(attrs$best_msg)) {
|
||||
object$best_msg <- attrs$best_msg
|
||||
}
|
||||
if (!is.null(attrs$niter)) {
|
||||
object$niter <- as.integer(attrs$niter)
|
||||
}
|
||||
|
||||
return(object)
|
||||
return(handle)
|
||||
}
|
||||
|
||||
#' Predict method for XGBoost model
|
||||
#'
|
||||
#' Predicted values based on either xgboost model or model handle object.
|
||||
#'
|
||||
#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
|
||||
#' @param object Object of class `xgb.Booster`.
|
||||
#' @param newdata Takes `matrix`, `dgCMatrix`, `dgRMatrix`, `dsparseVector`,
|
||||
#' local data file, or `xgb.DMatrix`.
|
||||
#' For single-row predictions on sparse data, it is recommended to use the CSR format.
|
||||
@@ -358,27 +263,19 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||
#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
|
||||
#' sum(pred5 != lb) / length(lb)
|
||||
#'
|
||||
#' @rdname predict.xgb.Booster
|
||||
#' @export
|
||||
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
|
||||
predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
|
||||
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
|
||||
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
||||
|
||||
if (!inherits(newdata, "xgb.DMatrix")) {
|
||||
config <- jsonlite::fromJSON(xgb.config(object))
|
||||
nthread <- strtoi(config$learner$generic_param$nthread)
|
||||
nthread <- xgb.nthread(object)
|
||||
newdata <- xgb.DMatrix(
|
||||
newdata,
|
||||
missing = missing, nthread = NVL(nthread, -1)
|
||||
)
|
||||
}
|
||||
if (!is.null(object[["feature_names"]]) &&
|
||||
!is.null(colnames(newdata)) &&
|
||||
!identical(object[["feature_names"]], colnames(newdata)))
|
||||
stop("Feature names stored in `object` and `newdata` are different!")
|
||||
|
||||
if (NVL(object$params[['booster']], '') == 'gblinear' || is.null(ntreelimit))
|
||||
if (NVL(xgb.booster_type(object), '') == 'gblinear' || is.null(ntreelimit))
|
||||
ntreelimit <- 0
|
||||
|
||||
if (ntreelimit != 0 && is.null(iterationrange)) {
|
||||
@@ -391,11 +288,12 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
## both are specified, let libgxgboost throw an error
|
||||
} else {
|
||||
## no limit is supplied, use best
|
||||
if (is.null(object$best_iteration)) {
|
||||
best_iteration <- xgb.best_iteration(object)
|
||||
if (is.null(best_iteration)) {
|
||||
iterationrange <- c(0, 0)
|
||||
} else {
|
||||
## We don't need to + 1 as R is 1-based index.
|
||||
iterationrange <- c(0, as.integer(object$best_iteration))
|
||||
iterationrange <- c(0, as.integer(best_iteration))
|
||||
}
|
||||
}
|
||||
## Handle the 0 length values.
|
||||
@@ -438,7 +336,10 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
}
|
||||
|
||||
predts <- .Call(
|
||||
XGBoosterPredictFromDMatrix_R, object$handle, newdata, jsonlite::toJSON(args, auto_unbox = TRUE)
|
||||
XGBoosterPredictFromDMatrix_R,
|
||||
xgb.get.handle(object),
|
||||
newdata,
|
||||
jsonlite::toJSON(args, auto_unbox = TRUE)
|
||||
)
|
||||
names(predts) <- c("shape", "results")
|
||||
shape <- predts$shape
|
||||
@@ -509,22 +410,12 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
return(arr)
|
||||
}
|
||||
|
||||
#' @rdname predict.xgb.Booster
|
||||
#' @export
|
||||
predict.xgb.Booster.handle <- function(object, ...) {
|
||||
|
||||
bst <- xgb.handleToBooster(handle = object, raw = NULL)
|
||||
|
||||
ret <- predict(bst, ...)
|
||||
return(ret)
|
||||
}
|
||||
|
||||
|
||||
#' Accessors for serializable attributes of a model
|
||||
#' @title Accessors for serializable attributes of a model
|
||||
#'
|
||||
#' These methods allow to manipulate the key-value attribute strings of an xgboost model.
|
||||
#' @description These methods allow to manipulate the key-value attribute strings of an xgboost model.
|
||||
#'
|
||||
#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
|
||||
#' @param object Object of class `xgb.Booster`. \bold{Will be modified in-place} when assigning to it.
|
||||
#' @param name A non-empty character string specifying which attribute is to be accessed.
|
||||
#' @param value For `xgb.attr<-`, a value of an attribute; for `xgb.attributes<-`,
|
||||
#' it is a list (or an object coercible to a list) with the names of attributes to set
|
||||
@@ -546,16 +437,15 @@ predict.xgb.Booster.handle <- function(object, ...) {
|
||||
#' change the value of that parameter for a model.
|
||||
#' Use [xgb.parameters<-()] to set or change model parameters.
|
||||
#'
|
||||
#' The attribute setters would usually work more efficiently for `xgb.Booster.handle`
|
||||
#' than for `xgb.Booster`, since only just a handle (pointer) would need to be copied.
|
||||
#' That would only matter if attributes need to be set many times.
|
||||
#' Note, however, that when feeding a handle of an `xgb.Booster` object to the attribute setters,
|
||||
#' the raw model cache of an `xgb.Booster` object would not be automatically updated,
|
||||
#' and it would be the user's responsibility to call [xgb.serialize()] to update it.
|
||||
#'
|
||||
#' The `xgb.attributes<-` setter either updates the existing or adds one or several attributes,
|
||||
#' but it doesn't delete the other existing attributes.
|
||||
#'
|
||||
#' Important: since this modifies the booster's C object, semantics for assignment here
|
||||
#' will differ from R's, as any object reference to the same booster will be modified
|
||||
#' too, while assignment of R attributes through `attributes(model)$<attr> <- <value>`
|
||||
#' will follow the usual copy-on-write R semantics (see \link{xgb.copy.Booster} for an
|
||||
#' example of these behaviors).
|
||||
#'
|
||||
#' @return
|
||||
#' - `xgb.attr()` returns either a string value of an attribute
|
||||
#' or `NULL` if an attribute wasn't stored in a model.
|
||||
@@ -597,14 +487,25 @@ predict.xgb.Booster.handle <- function(object, ...) {
|
||||
xgb.attr <- function(object, name) {
|
||||
if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name")
|
||||
handle <- xgb.get.handle(object)
|
||||
.Call(XGBoosterGetAttr_R, handle, as.character(name[1]))
|
||||
out <- .Call(XGBoosterGetAttr_R, handle, as.character(name[1]))
|
||||
if (!NROW(out) || !nchar(out)) {
|
||||
return(NULL)
|
||||
}
|
||||
if (!is.null(out)) {
|
||||
if (name %in% c("best_iteration", "best_ntreelimit", "best_score")) {
|
||||
out <- as.numeric(out)
|
||||
}
|
||||
}
|
||||
return(out)
|
||||
}
|
||||
|
||||
#' @rdname xgb.attr
|
||||
#' @export
|
||||
`xgb.attr<-` <- function(object, name, value) {
|
||||
if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name")
|
||||
name <- as.character(name[1])
|
||||
if (!NROW(name) || !nchar(name)) stop("invalid attribute name")
|
||||
handle <- xgb.get.handle(object)
|
||||
|
||||
if (!is.null(value)) {
|
||||
# Coerce the elements to be scalar strings.
|
||||
# Q: should we warn user about non-scalar elements?
|
||||
@@ -614,11 +515,8 @@ xgb.attr <- function(object, name) {
|
||||
value <- as.character(value[1])
|
||||
}
|
||||
}
|
||||
.Call(XGBoosterSetAttr_R, handle, as.character(name[1]), value)
|
||||
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
|
||||
object$raw <- xgb.serialize(object$handle)
|
||||
}
|
||||
object
|
||||
.Call(XGBoosterSetAttr_R, handle, name, value)
|
||||
return(object)
|
||||
}
|
||||
|
||||
#' @rdname xgb.attr
|
||||
@@ -626,12 +524,10 @@ xgb.attr <- function(object, name) {
|
||||
xgb.attributes <- function(object) {
|
||||
handle <- xgb.get.handle(object)
|
||||
attr_names <- .Call(XGBoosterGetAttrNames_R, handle)
|
||||
if (is.null(attr_names)) return(NULL)
|
||||
res <- lapply(attr_names, function(x) {
|
||||
.Call(XGBoosterGetAttr_R, handle, x)
|
||||
})
|
||||
names(res) <- attr_names
|
||||
res
|
||||
if (!NROW(attr_names)) return(list())
|
||||
out <- lapply(attr_names, function(name) xgb.attr(object, name))
|
||||
names(out) <- attr_names
|
||||
return(out)
|
||||
}
|
||||
|
||||
#' @rdname xgb.attr
|
||||
@@ -641,31 +537,21 @@ xgb.attributes <- function(object) {
|
||||
if (is.null(names(a)) || any(nchar(names(a)) == 0)) {
|
||||
stop("attribute names cannot be empty strings")
|
||||
}
|
||||
# Coerce the elements to be scalar strings.
|
||||
# Q: should we warn a user about non-scalar elements?
|
||||
a <- lapply(a, function(x) {
|
||||
if (is.null(x)) return(NULL)
|
||||
if (is.numeric(x[1])) {
|
||||
format(x[1], digits = 17)
|
||||
} else {
|
||||
as.character(x[1])
|
||||
}
|
||||
})
|
||||
handle <- xgb.get.handle(object)
|
||||
for (i in seq_along(a)) {
|
||||
.Call(XGBoosterSetAttr_R, handle, names(a[i]), a[[i]])
|
||||
xgb.attr(object, names(a[i])) <- a[[i]]
|
||||
}
|
||||
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
|
||||
object$raw <- xgb.serialize(object$handle)
|
||||
}
|
||||
object
|
||||
return(object)
|
||||
}
|
||||
|
||||
#' Accessors for model parameters as JSON string
|
||||
#'
|
||||
#' @param object Object of class `xgb.Booster`.
|
||||
#' @param value A JSON string.
|
||||
#' @title Accessors for model parameters as JSON string
|
||||
#' @details Note that assignment is performed in-place on the booster C object, which unlike assignment
|
||||
#' of R attributes, doesn't follow typical copy-on-write semantics for assignment - i.e. all references
|
||||
#' to the same booster will also get updated.
|
||||
#'
|
||||
#' See \link{xgb.copy.Booster} for an example of this behavior.
|
||||
#' @param object Object of class `xgb.Booster`. \bold{Will be modified in-place} when assigning to it.
|
||||
#' @param value An R list.
|
||||
#' @return `xgb.config` will return the parameters as an R list.
|
||||
#' @examples
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#'
|
||||
@@ -690,31 +576,36 @@ xgb.attributes <- function(object) {
|
||||
#' @export
|
||||
xgb.config <- function(object) {
|
||||
handle <- xgb.get.handle(object)
|
||||
.Call(XGBoosterSaveJsonConfig_R, handle)
|
||||
return(jsonlite::fromJSON(.Call(XGBoosterSaveJsonConfig_R, handle)))
|
||||
}
|
||||
|
||||
#' @rdname xgb.config
|
||||
#' @export
|
||||
`xgb.config<-` <- function(object, value) {
|
||||
handle <- xgb.get.handle(object)
|
||||
.Call(XGBoosterLoadJsonConfig_R, handle, value)
|
||||
object$raw <- NULL # force renew the raw buffer
|
||||
object <- xgb.Booster.complete(object)
|
||||
object
|
||||
.Call(
|
||||
XGBoosterLoadJsonConfig_R,
|
||||
handle,
|
||||
jsonlite::toJSON(value, auto_unbox = TRUE, null = "null")
|
||||
)
|
||||
return(object)
|
||||
}
|
||||
|
||||
#' Accessors for model parameters
|
||||
#' @title Accessors for model parameters
|
||||
#' @description Only the setter for xgboost parameters is currently implemented.
|
||||
#' @details Just like \link{xgb.attr}, this function will make in-place modifications
|
||||
#' on the booster object which do not follow typical R assignment semantics - that is,
|
||||
#' all references to the same booster will also be updated, unlike assingment of R
|
||||
#' attributes which follow copy-on-write semantics.
|
||||
#'
|
||||
#' Only the setter for xgboost parameters is currently implemented.
|
||||
#' See \link{xgb.copy.Booster} for an example of this behavior.
|
||||
#'
|
||||
#' @param object Object of class `xgb.Booster` or `xgb.Booster.handle`.
|
||||
#' Be aware that setting parameters of a fitted booster related to training continuation / updates
|
||||
#' will reset its number of rounds indicator to zero.
|
||||
#' @param object Object of class `xgb.Booster`. \bold{Will be modified in-place}.
|
||||
#' @param value A list (or an object coercible to a list) with the names of parameters to set
|
||||
#' and the elements corresponding to parameter values.
|
||||
#'
|
||||
#' @details
|
||||
#' Note that the setter would usually work more efficiently for `xgb.Booster.handle`
|
||||
#' than for `xgb.Booster`, since only just a handle would need to be copied.
|
||||
#'
|
||||
#' @return The same booster `object`, which gets modified in-place.
|
||||
#' @examples
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#' train <- agaricus.train
|
||||
@@ -751,28 +642,301 @@ xgb.config <- function(object) {
|
||||
for (i in seq_along(p)) {
|
||||
.Call(XGBoosterSetParam_R, handle, names(p[i]), p[[i]])
|
||||
}
|
||||
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
|
||||
object$raw <- xgb.serialize(object$handle)
|
||||
return(object)
|
||||
}
|
||||
|
||||
#' @rdname getinfo
|
||||
#' @export
|
||||
getinfo.xgb.Booster <- function(object, name) {
|
||||
name <- as.character(head(name, 1L))
|
||||
allowed_fields <- c("feature_name", "feature_type")
|
||||
if (!(name %in% allowed_fields)) {
|
||||
stop("getinfo: name must be one of the following: ", paste(allowed_fields, collapse = ", "))
|
||||
}
|
||||
object
|
||||
handle <- xgb.get.handle(object)
|
||||
out <- .Call(
|
||||
XGBoosterGetStrFeatureInfo_R,
|
||||
handle,
|
||||
name
|
||||
)
|
||||
if (!NROW(out)) {
|
||||
return(NULL)
|
||||
}
|
||||
return(out)
|
||||
}
|
||||
|
||||
#' @rdname getinfo
|
||||
#' @export
|
||||
setinfo.xgb.Booster <- function(object, name, info) {
|
||||
name <- as.character(head(name, 1L))
|
||||
allowed_fields <- c("feature_name", "feature_type")
|
||||
if (!(name %in% allowed_fields)) {
|
||||
stop("setinfo: unknown info name ", name)
|
||||
}
|
||||
info <- as.character(info)
|
||||
handle <- xgb.get.handle(object)
|
||||
.Call(
|
||||
XGBoosterSetStrFeatureInfo_R,
|
||||
handle,
|
||||
name,
|
||||
info
|
||||
)
|
||||
return(TRUE)
|
||||
}
|
||||
|
||||
#' @title Get number of boosting in a fitted booster
|
||||
#' @param model A fitted `xgb.Booster` model.
|
||||
#' @return The number of rounds saved in the model, as an integer.
|
||||
#' @details Note that setting booster parameters related to training
|
||||
#' continuation / updates through \link{xgb.parameters<-} will reset the
|
||||
#' number of rounds to zero.
|
||||
#' @export
|
||||
xgb.get.num.boosted.rounds <- function(model) {
|
||||
return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model)))
|
||||
}
|
||||
|
||||
#' @title Get Features Names from Booster
|
||||
#' @description Returns the feature / variable / column names from a fitted
|
||||
#' booster object, which are set automatically during the call to \link{xgb.train}
|
||||
#' from the DMatrix names, or which can be set manually through \link{setinfo}.
|
||||
#'
|
||||
#' If the object doesn't have feature names, will return `NULL`.
|
||||
#'
|
||||
#' It is equivalent to calling `getinfo(object, "feature_name")`.
|
||||
#' @param object An `xgb.Booster` object.
|
||||
#' @param ... Not used.
|
||||
#' @export
|
||||
variable.names.xgb.Booster <- function(object, ...) {
|
||||
return(getinfo(object, "feature_name"))
|
||||
}
|
||||
|
||||
# Extract the number of trees in a model.
|
||||
# TODO: either add a getter to C-interface, or simply set an 'ntree' attribute after each iteration.
|
||||
# internal utility function
|
||||
xgb.ntree <- function(bst) {
|
||||
length(grep('^booster', xgb.dump(bst)))
|
||||
config <- xgb.config(bst)
|
||||
out <- strtoi(config$learner$gradient_booster$gbtree_model_param$num_trees)
|
||||
return(out)
|
||||
}
|
||||
|
||||
xgb.nthread <- function(bst) {
|
||||
config <- xgb.config(bst)
|
||||
out <- strtoi(config$learner$generic_param$nthread)
|
||||
return(out)
|
||||
}
|
||||
|
||||
#' Print xgb.Booster
|
||||
xgb.booster_type <- function(bst) {
|
||||
config <- xgb.config(bst)
|
||||
out <- config$learner$learner_train_param$booster
|
||||
return(out)
|
||||
}
|
||||
|
||||
xgb.num_class <- function(bst) {
|
||||
config <- xgb.config(bst)
|
||||
out <- strtoi(config$learner$learner_model_param$num_class)
|
||||
return(out)
|
||||
}
|
||||
|
||||
xgb.feature_names <- function(bst) {
|
||||
return(getinfo(bst, "feature_name"))
|
||||
}
|
||||
|
||||
xgb.feature_types <- function(bst) {
|
||||
return(getinfo(bst, "feature_type"))
|
||||
}
|
||||
|
||||
xgb.num_feature <- function(bst) {
|
||||
handle <- xgb.get.handle(bst)
|
||||
return(.Call(XGBoosterGetNumFeature_R, handle))
|
||||
}
|
||||
|
||||
xgb.best_iteration <- function(bst) {
|
||||
out <- xgb.attr(bst, "best_iteration")
|
||||
if (!NROW(out) || !nchar(out)) {
|
||||
out <- NULL
|
||||
}
|
||||
return(out)
|
||||
}
|
||||
|
||||
#' @title Extract coefficients from linear booster
|
||||
#' @description Extracts the coefficients from a 'gblinear' booster object,
|
||||
#' as produced by \code{xgb.train} when using parameter `booster="gblinear"`.
|
||||
#'
|
||||
#' Print information about `xgb.Booster`.
|
||||
#' Note: this function will error out if passing a booster model
|
||||
#' which is not of "gblinear" type.
|
||||
#' @param object A fitted booster of 'gblinear' type.
|
||||
#' @param ... Not used.
|
||||
#' @return The extracted coefficients:\itemize{
|
||||
#' \item If there's only one coefficient per column in the data, will be returned as a
|
||||
#' vector, potentially containing the feature names if available, with the intercept
|
||||
#' as first column.
|
||||
#' \item If there's more than one coefficient per column in the data (e.g. when using
|
||||
#' `objective="multi:softmax"`), will be returned as a matrix with dimensions equal
|
||||
#' to `[num_features, num_cols]`, with the intercepts as first row. Note that the column
|
||||
#' (classes in multi-class classification) dimension will not be named.
|
||||
#' }
|
||||
#'
|
||||
#' The intercept returned here will include the 'base_score' parameter (unlike the 'bias'
|
||||
#' or the last coefficient in the model dump, which doesn't have 'base_score' added to it),
|
||||
#' hence one should get the same values from calling `predict(..., outputmargin = TRUE)` and
|
||||
#' from performing a matrix multiplication with `model.matrix(~., ...)`.
|
||||
#'
|
||||
#' Be aware that the coefficients are obtained by first converting them to strings and
|
||||
#' back, so there will always be some very small lose of precision compared to the actual
|
||||
#' coefficients as used by \link{predict.xgb.Booster}.
|
||||
#' @examples
|
||||
#' library(xgboost)
|
||||
#' data(mtcars)
|
||||
#' y <- mtcars[, 1]
|
||||
#' x <- as.matrix(mtcars[, -1])
|
||||
#' dm <- xgb.DMatrix(data = x, label = y, nthread = 1)
|
||||
#' params <- list(booster = "gblinear", nthread = 1)
|
||||
#' model <- xgb.train(data = dm, params = params, nrounds = 2)
|
||||
#' coef(model)
|
||||
#' @export
|
||||
coef.xgb.Booster <- function(object, ...) {
|
||||
booster_type <- xgb.booster_type(object)
|
||||
if (booster_type != "gblinear") {
|
||||
stop("Coefficients are not defined for Booster type ", booster_type)
|
||||
}
|
||||
model_json <- jsonlite::fromJSON(rawToChar(xgb.save.raw(object, raw_format = "json")))
|
||||
base_score <- model_json$learner$learner_model_param$base_score
|
||||
num_feature <- as.numeric(model_json$learner$learner_model_param$num_feature)
|
||||
|
||||
weights <- model_json$learner$gradient_booster$model$weights
|
||||
n_cols <- length(weights) / (num_feature + 1)
|
||||
if (n_cols != floor(n_cols) || n_cols < 1) {
|
||||
stop("Internal error: could not determine shape of coefficients.")
|
||||
}
|
||||
sep <- num_feature * n_cols
|
||||
coefs <- weights[seq(1, sep)]
|
||||
intercepts <- weights[seq(sep + 1, length(weights))]
|
||||
intercepts <- intercepts + as.numeric(base_score)
|
||||
|
||||
feature_names <- xgb.feature_names(object)
|
||||
if (!NROW(feature_names)) {
|
||||
# This mimics the default naming in R which names columns as "V1..N"
|
||||
# when names are needed but not available
|
||||
feature_names <- paste0("V", seq(1L, num_feature))
|
||||
}
|
||||
feature_names <- c("(Intercept)", feature_names)
|
||||
if (n_cols == 1L) {
|
||||
out <- c(intercepts, coefs)
|
||||
names(out) <- feature_names
|
||||
} else {
|
||||
coefs <- matrix(coefs, nrow = num_feature, byrow = TRUE)
|
||||
dim(intercepts) <- c(1L, n_cols)
|
||||
out <- rbind(intercepts, coefs)
|
||||
row.names(out) <- feature_names
|
||||
# TODO: if a class names attributes is added,
|
||||
# should use those names here.
|
||||
}
|
||||
return(out)
|
||||
}
|
||||
|
||||
#' @title Deep-copies a Booster Object
|
||||
#' @description Creates a deep copy of an 'xgb.Booster' object, such that the
|
||||
#' C object pointer contained will be a different object, and hence functions
|
||||
#' like \link{xgb.attr} will not affect the object from which it was copied.
|
||||
#' @param model An 'xgb.Booster' object.
|
||||
#' @return A deep copy of `model` - it will be identical in every way, but C-level
|
||||
#' functions called on that copy will not affect the `model` variable.
|
||||
#' @examples
|
||||
#' library(xgboost)
|
||||
#' data(mtcars)
|
||||
#' y <- mtcars$mpg
|
||||
#' x <- mtcars[, -1]
|
||||
#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
#' model <- xgb.train(
|
||||
#' data = dm,
|
||||
#' params = list(nthread = 1),
|
||||
#' nround = 3
|
||||
#' )
|
||||
#'
|
||||
#' # Set an arbitrary attribute kept at the C level
|
||||
#' xgb.attr(model, "my_attr") <- 100
|
||||
#' print(xgb.attr(model, "my_attr"))
|
||||
#'
|
||||
#' # Just assigning to a new variable will not create
|
||||
#' # a deep copy - C object pointer is shared, and in-place
|
||||
#' # modifications will affect both objects
|
||||
#' model_shallow_copy <- model
|
||||
#' xgb.attr(model_shallow_copy, "my_attr") <- 333
|
||||
#' # 'model' was also affected by this change:
|
||||
#' print(xgb.attr(model, "my_attr"))
|
||||
#'
|
||||
#' model_deep_copy <- xgb.copy.Booster(model)
|
||||
#' xgb.attr(model_deep_copy, "my_attr") <- 444
|
||||
#' # 'model' was NOT affected by this change
|
||||
#' # (keeps previous value that was assigned before)
|
||||
#' print(xgb.attr(model, "my_attr"))
|
||||
#'
|
||||
#' # Verify that the new object was actually modified
|
||||
#' print(xgb.attr(model_deep_copy, "my_attr"))
|
||||
#' @export
|
||||
xgb.copy.Booster <- function(model) {
|
||||
if (!inherits(model, "xgb.Booster")) {
|
||||
stop("'model' must be an 'xgb.Booster' object.")
|
||||
}
|
||||
return(.Call(XGDuplicate_R, model))
|
||||
}
|
||||
|
||||
#' @title Check if two boosters share the same C object
|
||||
#' @description Checks whether two booster objects refer to the same underlying C object.
|
||||
#' @details As booster objects (as returned by e.g. \link{xgb.train}) contain an R 'externalptr'
|
||||
#' object, they don't follow typical copy-on-write semantics of other R objects - that is, if
|
||||
#' one assigns a booster to a different variable and modifies that new variable through in-place
|
||||
#' methods like \link{xgb.attr<-}, the modification will be applied to both the old and the new
|
||||
#' variable, unlike typical R assignments which would only modify the latter.
|
||||
#'
|
||||
#' This function allows checking whether two booster objects share the same 'externalptr',
|
||||
#' regardless of the R attributes that they might have.
|
||||
#'
|
||||
#' In order to duplicate a booster in such a way that the copy wouldn't share the same
|
||||
#' 'externalptr', one can use function \link{xgb.copy.Booster}.
|
||||
#' @param obj1 Booster model to compare with `obj2`.
|
||||
#' @param obj2 Booster model to compare with `obj1`.
|
||||
#' @return Either `TRUE` or `FALSE` according to whether the two boosters share
|
||||
#' the underlying C object.
|
||||
#' @seealso \link{xgb.copy.Booster}
|
||||
#' @examples
|
||||
#' library(xgboost)
|
||||
#' data(mtcars)
|
||||
#' y <- mtcars$mpg
|
||||
#' x <- as.matrix(mtcars[, -1])
|
||||
#' model <- xgb.train(
|
||||
#' params = list(nthread = 1),
|
||||
#' data = xgb.DMatrix(x, label = y, nthread = 1),
|
||||
#' nround = 3
|
||||
#' )
|
||||
#'
|
||||
#' model_shallow_copy <- model
|
||||
#' xgb.is.same.Booster(model, model_shallow_copy) # same C object
|
||||
#'
|
||||
#' model_deep_copy <- xgb.copy.Booster(model)
|
||||
#' xgb.is.same.Booster(model, model_deep_copy) # different C objects
|
||||
#'
|
||||
#' # In-place assignments modify all references,
|
||||
#' # but not full/deep copies of the booster
|
||||
#' xgb.attr(model_shallow_copy, "my_attr") <- 111
|
||||
#' xgb.attr(model, "my_attr") # gets modified
|
||||
#' xgb.attr(model_deep_copy, "my_attr") # doesn't get modified
|
||||
#' @export
|
||||
xgb.is.same.Booster <- function(obj1, obj2) {
|
||||
if (!inherits(obj1, "xgb.Booster") || !inherits(obj2, "xgb.Booster")) {
|
||||
stop("'xgb.is.same.Booster' is only applicable to 'xgb.Booster' objects.")
|
||||
}
|
||||
return(
|
||||
.Call(
|
||||
XGPointerEqComparison_R,
|
||||
xgb.get.handle(obj1),
|
||||
xgb.get.handle(obj2)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
#' @title Print xgb.Booster
|
||||
#' @description Print information about `xgb.Booster`.
|
||||
#' @param x An `xgb.Booster` object.
|
||||
#' @param verbose Whether to print detailed data (e.g., attribute values).
|
||||
#' @param ... Not currently used.
|
||||
#'
|
||||
#' @param ... Not used.
|
||||
#' @return The same `x` object, returned invisibly
|
||||
#' @examples
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#' train <- agaricus.train
|
||||
@@ -790,79 +954,40 @@ xgb.ntree <- function(bst) {
|
||||
#' attr(bst, "myattr") <- "memo"
|
||||
#'
|
||||
#' print(bst)
|
||||
#' print(bst, verbose = TRUE)
|
||||
#'
|
||||
#' @export
|
||||
print.xgb.Booster <- function(x, verbose = FALSE, ...) {
|
||||
print.xgb.Booster <- function(x, ...) {
|
||||
# this lets it error out when the object comes from an earlier R xgboost version
|
||||
handle <- xgb.get.handle(x)
|
||||
cat('##### xgb.Booster\n')
|
||||
|
||||
valid_handle <- !is.null.handle(x$handle)
|
||||
if (!valid_handle)
|
||||
cat("Handle is invalid! Suggest using xgb.Booster.complete\n")
|
||||
|
||||
cat('raw: ')
|
||||
if (!is.null(x$raw)) {
|
||||
cat(format(object.size(x$raw), units = "auto"), '\n')
|
||||
} else {
|
||||
cat('NULL\n')
|
||||
}
|
||||
if (!is.null(x$call)) {
|
||||
R_attrs <- attributes(x)
|
||||
if (!is.null(R_attrs$call)) {
|
||||
cat('call:\n ')
|
||||
print(x$call)
|
||||
print(R_attrs$call)
|
||||
}
|
||||
|
||||
if (!is.null(x$params)) {
|
||||
cat('params (as set within xgb.train):\n')
|
||||
cat(' ',
|
||||
paste(names(x$params),
|
||||
paste0('"', unlist(x$params), '"'),
|
||||
sep = ' = ', collapse = ', '), '\n', sep = '')
|
||||
}
|
||||
# TODO: need an interface to access all the xgboosts parameters
|
||||
cat('# of features:', xgb.num_feature(x), '\n')
|
||||
cat('# of rounds: ', xgb.get.num.boosted.rounds(x), '\n')
|
||||
|
||||
attrs <- character(0)
|
||||
if (valid_handle)
|
||||
attrs <- xgb.attributes(x)
|
||||
if (length(attrs) > 0) {
|
||||
attr_names <- .Call(XGBoosterGetAttrNames_R, handle)
|
||||
if (NROW(attr_names)) {
|
||||
cat('xgb.attributes:\n')
|
||||
if (verbose) {
|
||||
cat(paste(paste0(' ', names(attrs)),
|
||||
paste0('"', unlist(attrs), '"'),
|
||||
sep = ' = ', collapse = '\n'), '\n', sep = '')
|
||||
} else {
|
||||
cat(' ', paste(names(attrs), collapse = ', '), '\n', sep = '')
|
||||
}
|
||||
cat(" ", paste(attr_names, collapse = ", "), "\n")
|
||||
}
|
||||
|
||||
if (!is.null(x$callbacks) && length(x$callbacks) > 0) {
|
||||
if (!is.null(R_attrs$callbacks) && length(R_attrs$callbacks) > 0) {
|
||||
cat('callbacks:\n')
|
||||
lapply(callback.calls(x$callbacks), function(x) {
|
||||
lapply(callback.calls(R_attrs$callbacks), function(x) {
|
||||
cat(' ')
|
||||
print(x)
|
||||
})
|
||||
}
|
||||
|
||||
if (!is.null(x$feature_names))
|
||||
cat('# of features:', length(x$feature_names), '\n')
|
||||
|
||||
cat('niter: ', x$niter, '\n', sep = '')
|
||||
# TODO: uncomment when faster xgb.ntree is implemented
|
||||
#cat('ntree: ', xgb.ntree(x), '\n', sep='')
|
||||
|
||||
for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
|
||||
'evaluation_log', 'niter', 'feature_names'))) {
|
||||
if (is.atomic(x[[n]])) {
|
||||
cat(n, ':', x[[n]], '\n', sep = ' ')
|
||||
} else {
|
||||
cat(n, ':\n\t', sep = ' ')
|
||||
print(x[[n]])
|
||||
}
|
||||
}
|
||||
|
||||
if (!is.null(x$evaluation_log)) {
|
||||
if (!is.null(R_attrs$evaluation_log)) {
|
||||
cat('evaluation_log:\n')
|
||||
print(x$evaluation_log, row.names = FALSE, topn = 2)
|
||||
print(R_attrs$evaluation_log, row.names = FALSE, topn = 2)
|
||||
}
|
||||
|
||||
invisible(x)
|
||||
return(invisible(x))
|
||||
}
|
||||
|
||||
@@ -335,14 +335,13 @@ dimnames.xgb.DMatrix <- function(x) {
|
||||
}
|
||||
|
||||
|
||||
#' Get information of an xgb.DMatrix object
|
||||
#'
|
||||
#' Get information of an xgb.DMatrix object
|
||||
#' @param object Object of class \code{xgb.DMatrix}
|
||||
#' @title Get or set information of xgb.DMatrix and xgb.Booster objects
|
||||
#' @param object Object of class \code{xgb.DMatrix} of `xgb.Booster`.
|
||||
#' @param name the name of the information field to get (see details)
|
||||
#'
|
||||
#' @return For `getinfo`, will return the requested field. For `setinfo`, will always return value `TRUE`
|
||||
#' if it succeeds.
|
||||
#' @details
|
||||
#' The \code{name} field can be one of the following:
|
||||
#' The \code{name} field can be one of the following for `xgb.DMatrix`:
|
||||
#'
|
||||
#' \itemize{
|
||||
#' \item \code{label}
|
||||
@@ -357,9 +356,17 @@ dimnames.xgb.DMatrix <- function(x) {
|
||||
#' }
|
||||
#' See the documentation for \link{xgb.DMatrix} for more information about these fields.
|
||||
#'
|
||||
#' For `xgb.Booster`, can be one of the following:
|
||||
#' \itemize{
|
||||
#' \item \code{feature_type}
|
||||
#' \item \code{feature_name}
|
||||
#' }
|
||||
#'
|
||||
#' Note that, while 'qid' cannot be retrieved, it's possible to get the equivalent 'group'
|
||||
#' for a DMatrix that had 'qid' assigned.
|
||||
#'
|
||||
#' \bold{Important}: when calling `setinfo`, the objects are modified in-place. See
|
||||
#' \link{xgb.copy.Booster} for an idea of this in-place assignment works.
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
@@ -412,13 +419,7 @@ getinfo.xgb.DMatrix <- function(object, name) {
|
||||
return(ret)
|
||||
}
|
||||
|
||||
|
||||
#' Set information of an xgb.DMatrix object
|
||||
#'
|
||||
#' Set information of an xgb.DMatrix object
|
||||
#'
|
||||
#' @param object Object of class "xgb.DMatrix"
|
||||
#' @param name the name of the field to get
|
||||
#' @rdname getinfo
|
||||
#' @param info the specific field of information to set
|
||||
#'
|
||||
#' @details
|
||||
@@ -441,11 +442,10 @@ getinfo.xgb.DMatrix <- function(object, name) {
|
||||
#' setinfo(dtrain, 'label', 1-labels)
|
||||
#' labels2 <- getinfo(dtrain, 'label')
|
||||
#' stopifnot(all.equal(labels2, 1-labels))
|
||||
#' @rdname setinfo
|
||||
#' @export
|
||||
setinfo <- function(object, name, info) UseMethod("setinfo")
|
||||
|
||||
#' @rdname setinfo
|
||||
#' @rdname getinfo
|
||||
#' @export
|
||||
setinfo.xgb.DMatrix <- function(object, name, info) {
|
||||
.internal.setinfo.xgb.DMatrix(object, name, info)
|
||||
|
||||
@@ -204,13 +204,13 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
|
||||
dtrain <- slice(dall, unlist(folds[-k]))
|
||||
else
|
||||
dtrain <- slice(dall, train_folds[[k]])
|
||||
handle <- xgb.Booster.handle(
|
||||
bst <- xgb.Booster(
|
||||
params = params,
|
||||
cachelist = list(dtrain, dtest),
|
||||
modelfile = NULL,
|
||||
handle = NULL
|
||||
modelfile = NULL
|
||||
)
|
||||
list(dtrain = dtrain, bst = handle, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
|
||||
bst <- bst$bst
|
||||
list(dtrain = dtrain, bst = bst, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
|
||||
})
|
||||
rm(dall)
|
||||
# a "basket" to collect some results from callbacks
|
||||
@@ -231,13 +231,13 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
|
||||
|
||||
msg <- lapply(bst_folds, function(fd) {
|
||||
xgb.iter.update(
|
||||
booster_handle = fd$bst,
|
||||
bst = fd$bst,
|
||||
dtrain = fd$dtrain,
|
||||
iter = iteration - 1,
|
||||
obj = obj
|
||||
)
|
||||
xgb.iter.eval(
|
||||
booster_handle = fd$bst,
|
||||
bst = fd$bst,
|
||||
watchlist = fd$watchlist,
|
||||
iter = iteration - 1,
|
||||
feval = feval
|
||||
@@ -267,7 +267,7 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
|
||||
ret <- c(ret, basket)
|
||||
|
||||
class(ret) <- 'xgb.cv.synchronous'
|
||||
invisible(ret)
|
||||
return(invisible(ret))
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -56,9 +56,13 @@ xgb.dump <- function(model, fname = NULL, fmap = "", with_stats = FALSE,
|
||||
if (!(is.null(fmap) || is.character(fmap)))
|
||||
stop("fmap: argument must be a character string (when provided)")
|
||||
|
||||
model <- xgb.Booster.complete(model)
|
||||
model_dump <- .Call(XGBoosterDumpModel_R, model$handle, NVL(fmap, "")[1], as.integer(with_stats),
|
||||
as.character(dump_format))
|
||||
model_dump <- .Call(
|
||||
XGBoosterDumpModel_R,
|
||||
xgb.get.handle(model),
|
||||
NVL(fmap, "")[1],
|
||||
as.integer(with_stats),
|
||||
as.character(dump_format)
|
||||
)
|
||||
if (dump_format == "dot") {
|
||||
return(sapply(model_dump, function(x) gsub("^booster\\[\\d+\\]\\n", "\\1", x)))
|
||||
}
|
||||
|
||||
@@ -119,21 +119,21 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
|
||||
if (!(is.null(data) && is.null(label) && is.null(target)))
|
||||
warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
|
||||
|
||||
if (!inherits(model, "xgb.Booster"))
|
||||
stop("model: must be an object of class xgb.Booster")
|
||||
|
||||
if (is.null(feature_names) && !is.null(model$feature_names))
|
||||
feature_names <- model$feature_names
|
||||
if (is.null(feature_names)) {
|
||||
model_feature_names <- xgb.feature_names(model)
|
||||
if (NROW(model_feature_names)) {
|
||||
feature_names <- model_feature_names
|
||||
}
|
||||
}
|
||||
|
||||
if (!(is.null(feature_names) || is.character(feature_names)))
|
||||
stop("feature_names: Has to be a character vector")
|
||||
|
||||
model <- xgb.Booster.complete(model)
|
||||
config <- jsonlite::fromJSON(xgb.config(model))
|
||||
if (config$learner$gradient_booster$name == "gblinear") {
|
||||
handle <- xgb.get.handle(model)
|
||||
if (xgb.booster_type(model) == "gblinear") {
|
||||
args <- list(importance_type = "weight", feature_names = feature_names)
|
||||
results <- .Call(
|
||||
XGBoosterFeatureScore_R, model$handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
|
||||
XGBoosterFeatureScore_R, handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
|
||||
)
|
||||
names(results) <- c("features", "shape", "weight")
|
||||
if (length(results$shape) == 2) {
|
||||
@@ -154,7 +154,7 @@ xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
|
||||
for (importance_type in c("weight", "total_gain", "total_cover")) {
|
||||
args <- list(importance_type = importance_type, feature_names = feature_names, tree_idx = trees)
|
||||
results <- .Call(
|
||||
XGBoosterFeatureScore_R, model$handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
|
||||
XGBoosterFeatureScore_R, handle, jsonlite::toJSON(args, auto_unbox = TRUE, null = "null")
|
||||
)
|
||||
names(results) <- c("features", "shape", importance_type)
|
||||
concatenated[
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
#' An object of \code{xgb.Booster} class.
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{xgb.save}}, \code{\link{xgb.Booster.complete}}.
|
||||
#' \code{\link{xgb.save}}
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
@@ -46,25 +46,20 @@ xgb.load <- function(modelfile) {
|
||||
if (is.null(modelfile))
|
||||
stop("xgb.load: modelfile cannot be NULL")
|
||||
|
||||
handle <- xgb.Booster.handle(
|
||||
bst <- xgb.Booster(
|
||||
params = list(),
|
||||
cachelist = list(),
|
||||
modelfile = modelfile,
|
||||
handle = NULL
|
||||
modelfile = modelfile
|
||||
)
|
||||
bst <- bst$bst
|
||||
# re-use modelfile if it is raw so we do not need to serialize
|
||||
if (typeof(modelfile) == "raw") {
|
||||
warning(
|
||||
paste(
|
||||
"The support for loading raw booster with `xgb.load` will be ",
|
||||
"discontinued in upcoming release. Use `xgb.load.raw` or",
|
||||
" `xgb.unserialize` instead. "
|
||||
"discontinued in upcoming release. Use `xgb.load.raw` instead. "
|
||||
)
|
||||
)
|
||||
bst <- xgb.handleToBooster(handle = handle, raw = modelfile)
|
||||
} else {
|
||||
bst <- xgb.handleToBooster(handle = handle, raw = NULL)
|
||||
}
|
||||
bst <- xgb.Booster.complete(bst, saveraw = TRUE)
|
||||
return(bst)
|
||||
}
|
||||
|
||||
@@ -3,21 +3,10 @@
|
||||
#' User can generate raw memory buffer by calling xgb.save.raw
|
||||
#'
|
||||
#' @param buffer the buffer returned by xgb.save.raw
|
||||
#' @param as_booster Return the loaded model as xgb.Booster instead of xgb.Booster.handle.
|
||||
#'
|
||||
#' @export
|
||||
xgb.load.raw <- function(buffer, as_booster = FALSE) {
|
||||
xgb.load.raw <- function(buffer) {
|
||||
cachelist <- list()
|
||||
handle <- .Call(XGBoosterCreate_R, cachelist)
|
||||
.Call(XGBoosterLoadModelFromRaw_R, handle, buffer)
|
||||
class(handle) <- "xgb.Booster.handle"
|
||||
|
||||
if (as_booster) {
|
||||
booster <- list(handle = handle, raw = NULL)
|
||||
class(booster) <- "xgb.Booster"
|
||||
booster <- xgb.Booster.complete(booster, saveraw = TRUE)
|
||||
return(booster)
|
||||
} else {
|
||||
return(handle)
|
||||
}
|
||||
bst <- .Call(XGBoosterCreate_R, cachelist)
|
||||
.Call(XGBoosterLoadModelFromRaw_R, xgb.get.handle(bst), buffer)
|
||||
return(bst)
|
||||
}
|
||||
|
||||
@@ -2,8 +2,10 @@
|
||||
#'
|
||||
#' Parse a boosted tree model text dump into a `data.table` structure.
|
||||
#'
|
||||
#' @param feature_names Character vector used to overwrite the feature names
|
||||
#' of the model. The default (`NULL`) uses the original feature names.
|
||||
#' @param feature_names Character vector of feature names. If the model already
|
||||
#' contains feature names, those will be used when \code{feature_names=NULL} (default value).
|
||||
#'
|
||||
#' Note that, if the model already contains feature names, it's \bold{not} possible to override them here.
|
||||
#' @param model Object of class `xgb.Booster`.
|
||||
#' @param text Character vector previously generated by the function [xgb.dump()]
|
||||
#' (called with parameter `with_stats = TRUE`). `text` takes precedence over `model`.
|
||||
@@ -54,8 +56,6 @@
|
||||
#' objective = "binary:logistic"
|
||||
#' )
|
||||
#'
|
||||
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
||||
#'
|
||||
#' # This bst model already has feature_names stored with it, so those would be used when
|
||||
#' # feature_names is not set:
|
||||
#' (dt <- xgb.model.dt.tree(model = bst))
|
||||
@@ -79,8 +79,15 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
" (or NULL if 'model' was provided).")
|
||||
}
|
||||
|
||||
if (is.null(feature_names) && !is.null(model) && !is.null(model$feature_names))
|
||||
feature_names <- model$feature_names
|
||||
model_feature_names <- NULL
|
||||
if (inherits(model, "xgb.Booster")) {
|
||||
model_feature_names <- xgb.feature_names(model)
|
||||
if (NROW(model_feature_names) && !is.null(feature_names)) {
|
||||
stop("'model' contains feature names. Cannot override them.")
|
||||
}
|
||||
}
|
||||
if (is.null(feature_names) && !is.null(model) && !is.null(model_feature_names))
|
||||
feature_names <- model_feature_names
|
||||
|
||||
if (!(is.null(feature_names) || is.character(feature_names))) {
|
||||
stop("feature_names: must be a character vector")
|
||||
@@ -90,8 +97,10 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
stop("trees: must be a vector of integers.")
|
||||
}
|
||||
|
||||
from_text <- TRUE
|
||||
if (is.null(text)) {
|
||||
text <- xgb.dump(model = model, with_stats = TRUE)
|
||||
from_text <- FALSE
|
||||
}
|
||||
|
||||
if (length(text) < 2 || !any(grepl('leaf=(\\d+)', text))) {
|
||||
@@ -120,8 +129,28 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
td[, isLeaf := grepl("leaf", t, fixed = TRUE)]
|
||||
|
||||
# parse branch lines
|
||||
branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
|
||||
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
||||
branch_rx_nonames <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
|
||||
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
||||
branch_rx_w_names <- paste0("\\d+:\\[(.+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
|
||||
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
||||
text_has_feature_names <- FALSE
|
||||
if (NROW(model_feature_names)) {
|
||||
branch_rx <- branch_rx_w_names
|
||||
text_has_feature_names <- TRUE
|
||||
} else {
|
||||
# Note: when passing a text dump, it might or might not have feature names,
|
||||
# but that aspect is unknown from just the text attributes
|
||||
branch_rx <- branch_rx_nonames
|
||||
if (from_text) {
|
||||
if (sum(grepl(branch_rx_w_names, text)) > sum(grepl(branch_rx_nonames, text))) {
|
||||
branch_rx <- branch_rx_w_names
|
||||
text_has_feature_names <- TRUE
|
||||
}
|
||||
}
|
||||
}
|
||||
if (text_has_feature_names && is.null(model) && !is.null(feature_names)) {
|
||||
stop("'text' contains feature names. Cannot override them.")
|
||||
}
|
||||
branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover")
|
||||
td[
|
||||
isLeaf == FALSE,
|
||||
@@ -144,10 +173,12 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
is_stump <- function() {
|
||||
return(length(td$Feature) == 1 && is.na(td$Feature))
|
||||
}
|
||||
if (!is.null(feature_names) && !is_stump()) {
|
||||
if (length(feature_names) <= max(as.numeric(td$Feature), na.rm = TRUE))
|
||||
stop("feature_names has less elements than there are features used in the model")
|
||||
td[isLeaf == FALSE, Feature := feature_names[as.numeric(Feature) + 1]]
|
||||
if (!text_has_feature_names) {
|
||||
if (!is.null(feature_names) && !is_stump()) {
|
||||
if (length(feature_names) <= max(as.numeric(td$Feature), na.rm = TRUE))
|
||||
stop("feature_names has less elements than there are features used in the model")
|
||||
td[isLeaf == FALSE, Feature := feature_names[as.numeric(Feature) + 1]]
|
||||
}
|
||||
}
|
||||
|
||||
# parse leaf lines
|
||||
|
||||
@@ -303,7 +303,11 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
|
||||
if (is.character(features) && is.null(colnames(data)))
|
||||
stop("either provide `data` with column names or provide `features` as column indices")
|
||||
|
||||
if (is.null(model$feature_names) && model$nfeatures != ncol(data))
|
||||
model_feature_names <- NULL
|
||||
if (is.null(features) && !is.null(model)) {
|
||||
model_feature_names <- xgb.feature_names(model)
|
||||
}
|
||||
if (is.null(model_feature_names) && xgb.num_feature(model) != ncol(data))
|
||||
stop("if model has no feature_names, columns in `data` must match features in model")
|
||||
|
||||
if (!is.null(subsample)) {
|
||||
@@ -332,7 +336,7 @@ xgb.shap.data <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
|
||||
}
|
||||
|
||||
if (is.null(features)) {
|
||||
if (!is.null(model$feature_names)) {
|
||||
if (!is.null(model_feature_names)) {
|
||||
imp <- xgb.importance(model = model, trees = trees)
|
||||
} else {
|
||||
imp <- xgb.importance(model = model, trees = trees, feature_names = colnames(data))
|
||||
|
||||
@@ -1,12 +1,24 @@
|
||||
#' Save xgboost model to binary file
|
||||
#'
|
||||
#' Save xgboost model to a file in binary format.
|
||||
#' Save xgboost model to a file in binary or JSON format.
|
||||
#'
|
||||
#' @param model model object of \code{xgb.Booster} class.
|
||||
#' @param fname name of the file to write.
|
||||
#' @param model Model object of \code{xgb.Booster} class.
|
||||
#' @param fname Name of the file to write.
|
||||
#'
|
||||
#' Note that the extension of this file name determined the serialization format to use:\itemize{
|
||||
#' \item Extension ".ubj" will use the universal binary JSON format (recommended).
|
||||
#' This format uses binary types for e.g. floating point numbers, thereby preventing any loss
|
||||
#' of precision when converting to a human-readable JSON text or similar.
|
||||
#' \item Extension ".json" will use plain JSON, which is a human-readable format.
|
||||
#' \item Extension ".deprecated" will use a \bold{deprecated} binary format. This format will
|
||||
#' not be able to save attributes introduced after v1 of XGBoost, such as the "best_iteration"
|
||||
#' attribute that boosters might keep, nor feature names or user-specifiec attributes.
|
||||
#' \item If the format is not specified by passing one of the file extensions above, will
|
||||
#' default to UBJ.
|
||||
#' }
|
||||
#'
|
||||
#' @details
|
||||
#' This methods allows to save a model in an xgboost-internal binary format which is universal
|
||||
#' This methods allows to save a model in an xgboost-internal binary or text format which is universal
|
||||
#' among the various xgboost interfaces. In R, the saved model file could be read-in later
|
||||
#' using either the \code{\link{xgb.load}} function or the \code{xgb_model} parameter
|
||||
#' of \code{\link{xgb.train}}.
|
||||
@@ -14,13 +26,13 @@
|
||||
#' Note: a model can also be saved as an R-object (e.g., by using \code{\link[base]{readRDS}}
|
||||
#' or \code{\link[base]{save}}). However, it would then only be compatible with R, and
|
||||
#' corresponding R-methods would need to be used to load it. Moreover, persisting the model with
|
||||
#' \code{\link[base]{readRDS}} or \code{\link[base]{save}}) will cause compatibility problems in
|
||||
#' \code{\link[base]{readRDS}} or \code{\link[base]{save}}) might cause compatibility problems in
|
||||
#' future versions of XGBoost. Consult \code{\link{a-compatibility-note-for-saveRDS-save}} to learn
|
||||
#' how to persist models in a future-proof way, i.e. to make the model accessible in future
|
||||
#' releases of XGBoost.
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{xgb.load}}, \code{\link{xgb.Booster.complete}}.
|
||||
#' \code{\link{xgb.load}}
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
@@ -51,8 +63,7 @@ xgb.save <- function(model, fname) {
|
||||
stop("model must be xgb.Booster.",
|
||||
if (inherits(model, "xgb.DMatrix")) " Use xgb.DMatrix.save to save an xgb.DMatrix object." else "")
|
||||
}
|
||||
model <- xgb.Booster.complete(model, saveraw = FALSE)
|
||||
fname <- path.expand(fname)
|
||||
.Call(XGBoosterSaveModel_R, model$handle, enc2utf8(fname[1]))
|
||||
.Call(XGBoosterSaveModel_R, xgb.get.handle(model), enc2utf8(fname[1]))
|
||||
return(TRUE)
|
||||
}
|
||||
|
||||
@@ -11,8 +11,6 @@
|
||||
#' \item \code{deprecated}: Encode the booster into old customized binary format.
|
||||
#' }
|
||||
#'
|
||||
#' Right now the default is \code{deprecated} but will be changed to \code{ubj} in upcoming release.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
@@ -30,7 +28,7 @@
|
||||
#' bst <- xgb.load.raw(raw)
|
||||
#'
|
||||
#' @export
|
||||
xgb.save.raw <- function(model, raw_format = "deprecated") {
|
||||
xgb.save.raw <- function(model, raw_format = "ubj") {
|
||||
handle <- xgb.get.handle(model)
|
||||
args <- list(format = raw_format)
|
||||
.Call(XGBoosterSaveModelToRaw_R, handle, jsonlite::toJSON(args, auto_unbox = TRUE))
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
#' Serialize the booster instance into R's raw vector. The serialization method differs
|
||||
#' from \code{\link{xgb.save.raw}} as the latter one saves only the model but not
|
||||
#' parameters. This serialization format is not stable across different xgboost versions.
|
||||
#'
|
||||
#' @param booster the booster instance
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||
#' raw <- xgb.serialize(bst)
|
||||
#' bst <- xgb.unserialize(raw)
|
||||
#'
|
||||
#' @export
|
||||
xgb.serialize <- function(booster) {
|
||||
handle <- xgb.get.handle(booster)
|
||||
.Call(XGBoosterSerializeToBuffer_R, handle)
|
||||
}
|
||||
@@ -152,6 +152,10 @@
|
||||
#' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
|
||||
#' parameters' values. User can provide either existing or their own callback methods in order
|
||||
#' to customize the training process.
|
||||
#'
|
||||
#' Note that some callbacks might try to set an evaluation log - be aware that these evaluation logs
|
||||
#' are kept as R attributes, and thus do not get saved when using non-R serializaters like
|
||||
#' \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
|
||||
#' @param ... other parameters to pass to \code{params}.
|
||||
#' @param label vector of response values. Should not be provided when data is
|
||||
#' a local data file name or an \code{xgb.DMatrix}.
|
||||
@@ -160,6 +164,9 @@
|
||||
#' This parameter is only used when input is a dense matrix.
|
||||
#' @param weight a vector indicating the weight for each row of the input.
|
||||
#'
|
||||
#' @return
|
||||
#' An object of class \code{xgb.Booster}.
|
||||
#'
|
||||
#' @details
|
||||
#' These are the training functions for \code{xgboost}.
|
||||
#'
|
||||
@@ -201,28 +208,20 @@
|
||||
#' \item \code{cb.save.model}: when \code{save_period > 0} is set.
|
||||
#' }
|
||||
#'
|
||||
#' @return
|
||||
#' An object of class \code{xgb.Booster} with the following elements:
|
||||
#' \itemize{
|
||||
#' \item \code{handle} a handle (pointer) to the xgboost model in memory.
|
||||
#' \item \code{raw} a cached memory dump of the xgboost model saved as R's \code{raw} type.
|
||||
#' \item \code{niter} number of boosting iterations.
|
||||
#' \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
|
||||
#' first column corresponding to iteration number and the rest corresponding to evaluation
|
||||
#' metrics' values. It is created by the \code{\link{cb.evaluation.log}} callback.
|
||||
#' \item \code{call} a function call.
|
||||
#' \item \code{params} parameters that were passed to the xgboost library. Note that it does not
|
||||
#' capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
|
||||
#' \item \code{callbacks} callback functions that were either automatically assigned or
|
||||
#' explicitly passed.
|
||||
#' \item \code{best_iteration} iteration number with the best evaluation metric value
|
||||
#' (only available with early stopping).
|
||||
#' \item \code{best_score} the best evaluation metric value during early stopping.
|
||||
#' (only available with early stopping).
|
||||
#' \item \code{feature_names} names of the training dataset features
|
||||
#' (only when column names were defined in training data).
|
||||
#' \item \code{nfeatures} number of features in training data.
|
||||
#' }
|
||||
#' Note that objects of type `xgb.Booster` as returned by this function behave a bit differently
|
||||
#' from typical R objects (it's an 'altrep' list class), and it makes a separation between
|
||||
#' internal booster attributes (restricted to jsonifyable data), accessed through \link{xgb.attr}
|
||||
#' and shared between interfaces through serialization functions like \link{xgb.save}; and
|
||||
#' R-specific attributes, accessed through \link{attributes} and \link{attr}, which are otherwise
|
||||
#' only used in the R interface, only kept when using R's serializers like \link{saveRDS}, and
|
||||
#' not anyhow used by functions like \link{predict.xgb.Booster}.
|
||||
#'
|
||||
#' Be aware that one such R attribute that is automatically added is `params` - this attribute
|
||||
#' is assigned from the `params` argument to this function, and is only meant to serve as a
|
||||
#' reference for what went into the booster, but is not used in other methods that take a booster
|
||||
#' object - so for example, changing the booster's configuration requires calling `xgb.config<-`
|
||||
#' or 'xgb.parameters<-', while simply modifying `attributes(model)$params$<...>` will have no
|
||||
#' effect elsewhere.
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{callbacks}},
|
||||
@@ -371,27 +370,31 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||
# The tree updating process would need slightly different handling
|
||||
is_update <- NVL(params[['process_type']], '.') == 'update'
|
||||
|
||||
past_evaluation_log <- NULL
|
||||
if (inherits(xgb_model, "xgb.Booster")) {
|
||||
past_evaluation_log <- attributes(xgb_model)$evaluation_log
|
||||
}
|
||||
|
||||
# Construct a booster (either a new one or load from xgb_model)
|
||||
handle <- xgb.Booster.handle(
|
||||
bst <- xgb.Booster(
|
||||
params = params,
|
||||
cachelist = append(watchlist, dtrain),
|
||||
modelfile = xgb_model,
|
||||
handle = NULL
|
||||
modelfile = xgb_model
|
||||
)
|
||||
niter_init <- bst$niter
|
||||
bst <- bst$bst
|
||||
.Call(
|
||||
XGBoosterCopyInfoFromDMatrix_R,
|
||||
xgb.get.handle(bst),
|
||||
dtrain
|
||||
)
|
||||
bst <- xgb.handleToBooster(handle = handle, raw = NULL)
|
||||
|
||||
# extract parameters that can affect the relationship b/w #trees and #iterations
|
||||
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1)
|
||||
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1)
|
||||
# Note: it might look like these aren't used, but they need to be defined in this
|
||||
# environment for the callbacks for work correctly.
|
||||
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
|
||||
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
|
||||
|
||||
# When the 'xgb_model' was set, find out how many boosting iterations it has
|
||||
niter_init <- 0
|
||||
if (!is.null(xgb_model)) {
|
||||
niter_init <- as.numeric(xgb.attr(bst, 'niter')) + 1
|
||||
if (length(niter_init) == 0) {
|
||||
niter_init <- xgb.ntree(bst) %/% (num_parallel_tree * num_class)
|
||||
}
|
||||
}
|
||||
if (is_update && nrounds > niter_init)
|
||||
stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
|
||||
|
||||
@@ -405,7 +408,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||
for (f in cb$pre_iter) f()
|
||||
|
||||
xgb.iter.update(
|
||||
booster_handle = bst$handle,
|
||||
bst = bst,
|
||||
dtrain = dtrain,
|
||||
iter = iteration - 1,
|
||||
obj = obj
|
||||
@@ -413,46 +416,43 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||
|
||||
if (length(watchlist) > 0) {
|
||||
bst_evaluation <- xgb.iter.eval( # nolint: object_usage_linter
|
||||
booster_handle = bst$handle,
|
||||
bst = bst,
|
||||
watchlist = watchlist,
|
||||
iter = iteration - 1,
|
||||
feval = feval
|
||||
)
|
||||
}
|
||||
|
||||
xgb.attr(bst$handle, 'niter') <- iteration - 1
|
||||
|
||||
for (f in cb$post_iter) f()
|
||||
|
||||
if (stop_condition) break
|
||||
}
|
||||
for (f in cb$finalize) f(finalize = TRUE)
|
||||
|
||||
bst <- xgb.Booster.complete(bst, saveraw = TRUE)
|
||||
|
||||
# store the total number of boosting iterations
|
||||
bst$niter <- end_iteration
|
||||
|
||||
# store the evaluation results
|
||||
if (length(evaluation_log) > 0 &&
|
||||
nrow(evaluation_log) > 0) {
|
||||
keep_evaluation_log <- FALSE
|
||||
if (length(evaluation_log) > 0 && nrow(evaluation_log) > 0) {
|
||||
keep_evaluation_log <- TRUE
|
||||
# include the previous compatible history when available
|
||||
if (inherits(xgb_model, 'xgb.Booster') &&
|
||||
!is_update &&
|
||||
!is.null(xgb_model$evaluation_log) &&
|
||||
!is.null(past_evaluation_log) &&
|
||||
isTRUE(all.equal(colnames(evaluation_log),
|
||||
colnames(xgb_model$evaluation_log)))) {
|
||||
evaluation_log <- rbindlist(list(xgb_model$evaluation_log, evaluation_log))
|
||||
colnames(past_evaluation_log)))) {
|
||||
evaluation_log <- rbindlist(list(past_evaluation_log, evaluation_log))
|
||||
}
|
||||
bst$evaluation_log <- evaluation_log
|
||||
}
|
||||
|
||||
bst$call <- match.call()
|
||||
bst$params <- params
|
||||
bst$callbacks <- callbacks
|
||||
if (!is.null(colnames(dtrain)))
|
||||
bst$feature_names <- colnames(dtrain)
|
||||
bst$nfeatures <- ncol(dtrain)
|
||||
extra_attrs <- list(
|
||||
call = match.call(),
|
||||
params = params,
|
||||
callbacks = callbacks
|
||||
)
|
||||
if (keep_evaluation_log) {
|
||||
extra_attrs$evaluation_log <- evaluation_log
|
||||
}
|
||||
curr_attrs <- attributes(bst)
|
||||
attributes(bst) <- c(curr_attrs, extra_attrs)
|
||||
|
||||
return(bst)
|
||||
}
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
#' Load the instance back from \code{\link{xgb.serialize}}
|
||||
#'
|
||||
#' @param buffer the buffer containing booster instance saved by \code{\link{xgb.serialize}}
|
||||
#' @param handle An \code{xgb.Booster.handle} object which will be overwritten with
|
||||
#' the new deserialized object. Must be a null handle (e.g. when loading the model through
|
||||
#' `readRDS`). If not provided, a new handle will be created.
|
||||
#' @return An \code{xgb.Booster.handle} object.
|
||||
#'
|
||||
#' @export
|
||||
xgb.unserialize <- function(buffer, handle = NULL) {
|
||||
cachelist <- list()
|
||||
if (is.null(handle)) {
|
||||
handle <- .Call(XGBoosterCreate_R, cachelist)
|
||||
} else {
|
||||
if (!is.null.handle(handle))
|
||||
stop("'handle' is not null/empty. Cannot overwrite existing handle.")
|
||||
.Call(XGBoosterCreateInEmptyObj_R, cachelist, handle)
|
||||
}
|
||||
tryCatch(
|
||||
.Call(XGBoosterUnserializeFromBuffer_R, handle, buffer),
|
||||
error = function(e) {
|
||||
error_msg <- conditionMessage(e)
|
||||
m <- regexec("(src[\\\\/]learner.cc:[0-9]+): Check failed: (header == serialisation_header_)",
|
||||
error_msg, perl = TRUE)
|
||||
groups <- regmatches(error_msg, m)[[1]]
|
||||
if (length(groups) == 3) {
|
||||
warning(paste("The model had been generated by XGBoost version 1.0.0 or earlier and was ",
|
||||
"loaded from a RDS file. We strongly ADVISE AGAINST using saveRDS() ",
|
||||
"function, to ensure that your model can be read in current and upcoming ",
|
||||
"XGBoost releases. Please use xgb.save() instead to preserve models for the ",
|
||||
"long term. For more details and explanation, see ",
|
||||
"https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html",
|
||||
sep = ""))
|
||||
.Call(XGBoosterLoadModelFromRaw_R, handle, buffer)
|
||||
} else {
|
||||
stop(e)
|
||||
}
|
||||
})
|
||||
class(handle) <- "xgb.Booster.handle"
|
||||
return(handle)
|
||||
}
|
||||
@@ -100,8 +100,10 @@ NULL
|
||||
#' @importFrom jsonlite toJSON
|
||||
#' @importFrom methods new
|
||||
#' @importFrom utils object.size str tail
|
||||
#' @importFrom stats coef
|
||||
#' @importFrom stats predict
|
||||
#' @importFrom stats median
|
||||
#' @importFrom stats variable.names
|
||||
#' @importFrom utils head
|
||||
#' @importFrom graphics barplot
|
||||
#' @importFrom graphics lines
|
||||
|
||||
Reference in New Issue
Block a user