1245 lines
48 KiB
R
1245 lines
48 KiB
R
.reserved_cb_names <- c("names", "class", "call", "params", "niter", "nfeatures", "folds")
|
|
|
|
#' @title XGBoost Callback Constructor
|
|
#' @description Constructor for defining the structure of callback functions that can be executed
|
|
#' at different stages of model training (before / after training, before / after each boosting
|
|
#' iteration).
|
|
#' @param cb_name Name for the callback.
|
|
#'
|
|
#' If the callback produces some non-NULL result (from executing the function passed under
|
|
#' `f_after_training`), that result will be added as an R attribute to the resulting booster
|
|
#' (or as a named element in the result of CV), with the attribute name specified here.
|
|
#'
|
|
#' Names of callbacks must be unique - i.e. there cannot be two callbacks with the same name.
|
|
#' @param env An environment object that will be passed to the different functions in the callback.
|
|
#' Note that this environment will not be shared with other callbacks.
|
|
#' @param f_before_training A function that will be executed before the training has started.
|
|
#'
|
|
#' If passing `NULL` for this or for the other function inputs, then no function will be executed.
|
|
#'
|
|
#' If passing a function, it will be called with parameters supplied as non-named arguments
|
|
#' matching the function signatures that are shown in the default value for each function argument.
|
|
#' @param f_before_iter A function that will be executed before each boosting round.
|
|
#'
|
|
#' This function can signal whether the training should be finalized or not, by outputting
|
|
#' a value that evaluates to `TRUE` - i.e. if the output from the function provided here at
|
|
#' a given round is `TRUE`, then training will be stopped before the current iteration happens.
|
|
#'
|
|
#' Return values of `NULL` will be interpreted as `FALSE`.
|
|
#' @param f_after_iter A function that will be executed after each boosting round.
|
|
#'
|
|
#' This function can signal whether the training should be finalized or not, by outputting
|
|
#' a value that evaluates to `TRUE` - i.e. if the output from the function provided here at
|
|
#' a given round is `TRUE`, then training will be stopped at that round.
|
|
#'
|
|
#' Return values of `NULL` will be interpreted as `FALSE`.
|
|
#' @param f_after_training A function that will be executed after training is finished.
|
|
#'
|
|
#' This function can optionally output something non-NULL, which will become part of the R
|
|
#' attributes of the booster (assuming one passes `keep_extra_attributes=TRUE` to \link{xgb.train})
|
|
#' under the name supplied for parameter `cb_name` imn the case of \link{xgb.train}; or a part
|
|
#' of the named elements in the result of \link{xgb.cv}.
|
|
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
|
#' @details Arguments that will be passed to the supplied functions are as follows:\itemize{
|
|
#'
|
|
#' \item env The same environment that is passed under argument `env`.
|
|
#'
|
|
#' It may be modified by the functions in order to e.g. keep tracking of what happens
|
|
#' across iterations or similar.
|
|
#'
|
|
#' This environment is only used by the functions supplied to the callback, and will
|
|
#' not be kept after the model fitting function terminates (see parameter `f_after_training`).
|
|
#'
|
|
#' \item model The booster object when using \link{xgb.train}, or the folds when using
|
|
#' \link{xgb.cv}.
|
|
#'
|
|
#' For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
|
|
#' \item `dtrain`: The training data for the fold (as an `xgb.DMatrix` object).
|
|
#' \item `bst`: Rhe `xgb.Booster` object for the fold.
|
|
#' \item `evals`: A list containing two DMatrices, with names `train` and `test`
|
|
#' (`test` is the held-out data for the fold).
|
|
#' \item `index`: The indices of the hold-out data for that fold (base-1 indexing),
|
|
#' from which the `test` entry in `evals` was obtained.
|
|
#' }
|
|
#'
|
|
#' This object should \bold{not} be in-place modified in ways that conflict with the
|
|
#' training (e.g. resetting the parameters for a training update in a way that resets
|
|
#' the number of rounds to zero in order to overwrite rounds).
|
|
#'
|
|
#' Note that any R attributes that are assigned to the booster during the callback functions,
|
|
#' will not be kept thereafter as the booster object variable is not re-assigned during
|
|
#' training. It is however possible to set C-level attributes of the booster through
|
|
#' \link{xgb.attr} or \link{xgb.attributes}, which should remain available for the rest
|
|
#' of the iterations and after the training is done.
|
|
#'
|
|
#' For keeping variables across iterations, it's recommended to use `env` instead.
|
|
#' \item data The data to which the model is being fit, as an `xgb.DMatrix` object.
|
|
#'
|
|
#' Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
|
|
#' folds can be found in the `model` object.
|
|
#'
|
|
#' \item evals The evaluation data, as passed under argument `evals` to
|
|
#' \link{xgb.train}.
|
|
#'
|
|
#' For \link{xgb.cv}, this will always be `NULL`.
|
|
#'
|
|
#' \item begin_iteration Index of the first boosting iteration that will be executed
|
|
#' (base-1 indexing).
|
|
#'
|
|
#' This will typically be '1', but when using training continuation, depending on the
|
|
#' parameters for updates, boosting rounds will be continued from where the previous
|
|
#' model ended, in which case this will be larger than 1.
|
|
#'
|
|
#' \item end_iteration Index of the last boostign iteration that will be executed
|
|
#' (base-1 indexing, inclusive of this end).
|
|
#'
|
|
#' It should match with argument `nrounds` passed to \link{xgb.train} or \link{xgb.cv}.
|
|
#'
|
|
#' Note that boosting might be interrupted before reaching this last iteration, for
|
|
#' example by using the early stopping callback \link{xgb.cb.early.stop}.
|
|
#'
|
|
#' \item iteration Index of the iteration number that is being executed (first iteration
|
|
#' will be the same as parameter `begin_iteration`, then next one will add +1, and so on).
|
|
#'
|
|
#' \item iter_feval Evaluation metrics for `evals` that were supplied, either
|
|
#' determined by the objective, or by parameter `feval`.
|
|
#'
|
|
#' For \link{xgb.train}, this will be a named vector with one entry per element in
|
|
#' `evals`, where the names are determined as 'evals name' + '-' + 'metric name' - for
|
|
#' example, if `evals` contains an entry named "tr" and the metric is "rmse",
|
|
#' this will be a one-element vector with name "tr-rmse".
|
|
#'
|
|
#' For \link{xgb.cv}, this will be a 2d matrix with dimensions `[length(evals), nfolds]`,
|
|
#' where the row names will follow the same naming logic as the one-dimensional vector
|
|
#' that is passed in \link{xgb.train}.
|
|
#'
|
|
#' Note that, internally, the built-in callbacks such as \link{xgb.cb.print.evaluation} summarize
|
|
#' this table by calculating the row-wise means and standard deviations.
|
|
#'
|
|
#' \item final_feval The evaluation results after the last boosting round is executed
|
|
#' (same format as `iter_feval`, and will be the exact same input as passed under
|
|
#' `iter_feval` to the last round that is executed during model fitting).
|
|
#'
|
|
#' \item prev_cb_res Result from a previous run of a callback sharing the same name
|
|
#' (as given by parameter `cb_name`) when conducting training continuation, if there
|
|
#' was any in the booster R attributes.
|
|
#'
|
|
#' Some times, one might want to append the new results to the previous one, and this will
|
|
#' be done automatically by the built-in callbacks such as \link{xgb.cb.evaluation.log},
|
|
#' which will append the new rows to the previous table.
|
|
#'
|
|
#' If no such previous callback result is available (which it never will when fitting
|
|
#' a model from start instead of updating an existing model), this will be `NULL`.
|
|
#'
|
|
#' For \link{xgb.cv}, which doesn't support training continuation, this will always be `NULL`.
|
|
#' }
|
|
#'
|
|
#' The following names (`cb_name` values) are reserved for internal callbacks:\itemize{
|
|
#' \item print_evaluation
|
|
#' \item evaluation_log
|
|
#' \item reset_parameters
|
|
#' \item early_stop
|
|
#' \item save_model
|
|
#' \item cv_predict
|
|
#' \item gblinear_history
|
|
#' }
|
|
#'
|
|
#' The following names are reserved for other non-callback attributes:\itemize{
|
|
#' \item names
|
|
#' \item class
|
|
#' \item call
|
|
#' \item params
|
|
#' \item niter
|
|
#' \item nfeatures
|
|
#' \item folds
|
|
#' }
|
|
#'
|
|
#' When using the built-in early stopping callback (\link{xgb.cb.early.stop}), said callback
|
|
#' will always be executed before the others, as it sets some booster C-level attributes
|
|
#' that other callbacks might also use. Otherwise, the order of execution will match with
|
|
#' the order in which the callbacks are passed to the model fitting function.
|
|
#' @seealso Built-in callbacks:\itemize{
|
|
#' \item \link{xgb.cb.print.evaluation}
|
|
#' \item \link{xgb.cb.evaluation.log}
|
|
#' \item \link{xgb.cb.reset.parameters}
|
|
#' \item \link{xgb.cb.early.stop}
|
|
#' \item \link{xgb.cb.save.model}
|
|
#' \item \link{xgb.cb.cv.predict}
|
|
#' \item \link{xgb.cb.gblinear.history}
|
|
#' }
|
|
#' @examples
|
|
#' # Example constructing a custom callback that calculates
|
|
#' # squared error on the training data (no separate test set),
|
|
#' # and outputs the per-iteration results.
|
|
#' ssq_callback <- xgb.Callback(
|
|
#' cb_name = "ssq",
|
|
#' f_before_training = function(env, model, data, evals,
|
|
#' begin_iteration, end_iteration) {
|
|
#' # A vector to keep track of a number at each iteration
|
|
#' env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
|
|
#' },
|
|
#' f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
|
|
#' # This calculates the sum of squared errors on the training data.
|
|
#' # Note that this can be better done by passing an 'evals' entry,
|
|
#' # but this demonstrates a way in which callbacks can be structured.
|
|
#' pred <- predict(model, data)
|
|
#' err <- pred - getinfo(data, "label")
|
|
#' sq_err <- sum(err^2)
|
|
#' env$logs[iteration] <- sq_err
|
|
#' cat(
|
|
#' sprintf(
|
|
#' "Squared error at iteration %d: %.2f\n",
|
|
#' iteration, sq_err
|
|
#' )
|
|
#' )
|
|
#'
|
|
#' # A return value of 'TRUE' here would signal to finalize the training
|
|
#' return(FALSE)
|
|
#' },
|
|
#' f_after_training = function(env, model, data, evals, iteration,
|
|
#' final_feval, prev_cb_res) {
|
|
#' return(env$logs)
|
|
#' }
|
|
#' )
|
|
#'
|
|
#' data(mtcars)
|
|
#' y <- mtcars$mpg
|
|
#' x <- as.matrix(mtcars[, -1])
|
|
#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
|
#' model <- xgb.train(
|
|
#' data = dm,
|
|
#' params = list(objective = "reg:squarederror", nthread = 1),
|
|
#' nrounds = 5,
|
|
#' callbacks = list(ssq_callback),
|
|
#' keep_extra_attributes = TRUE
|
|
#' )
|
|
#'
|
|
#' # Result from 'f_after_iter' will be available as an attribute
|
|
#' attributes(model)$ssq
|
|
#' @export
|
|
xgb.Callback <- function(
|
|
cb_name = "custom_callback",
|
|
env = new.env(),
|
|
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) NULL,
|
|
f_before_iter = function(env, model, data, evals, iteration) NULL,
|
|
f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
|
|
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) NULL
|
|
) {
|
|
stopifnot(is.null(f_before_training) || is.function(f_before_training))
|
|
stopifnot(is.null(f_before_iter) || is.function(f_before_iter))
|
|
stopifnot(is.null(f_after_iter) || is.function(f_after_iter))
|
|
stopifnot(is.null(f_after_training) || is.function(f_after_training))
|
|
stopifnot(is.character(cb_name) && length(cb_name) == 1)
|
|
|
|
if (cb_name %in% .reserved_cb_names) {
|
|
stop("Cannot use reserved callback name '", cb_name, "'.")
|
|
}
|
|
|
|
out <- list(
|
|
cb_name = cb_name,
|
|
env = env,
|
|
f_before_training = f_before_training,
|
|
f_before_iter = f_before_iter,
|
|
f_after_iter = f_after_iter,
|
|
f_after_training = f_after_training
|
|
)
|
|
class(out) <- "xgb.Callback"
|
|
return(out)
|
|
}
|
|
|
|
.execute.cb.before.training <- function(
|
|
callbacks,
|
|
model,
|
|
data,
|
|
evals,
|
|
begin_iteration,
|
|
end_iteration
|
|
) {
|
|
for (callback in callbacks) {
|
|
if (!is.null(callback$f_before_training)) {
|
|
callback$f_before_training(
|
|
callback$env,
|
|
model,
|
|
data,
|
|
evals,
|
|
begin_iteration,
|
|
end_iteration
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
.execute.cb.before.iter <- function(
|
|
callbacks,
|
|
model,
|
|
data,
|
|
evals,
|
|
iteration
|
|
) {
|
|
if (!length(callbacks)) {
|
|
return(FALSE)
|
|
}
|
|
out <- sapply(callbacks, function(cb) {
|
|
if (is.null(cb$f_before_iter)) {
|
|
return(FALSE)
|
|
}
|
|
should_stop <- cb$f_before_iter(
|
|
cb$env,
|
|
model,
|
|
data,
|
|
evals,
|
|
iteration
|
|
)
|
|
if (!NROW(should_stop)) {
|
|
should_stop <- FALSE
|
|
} else if (NROW(should_stop) > 1) {
|
|
should_stop <- head(as.logical(should_stop), 1)
|
|
}
|
|
return(should_stop)
|
|
})
|
|
return(any(out))
|
|
}
|
|
|
|
.execute.cb.after.iter <- function(
|
|
callbacks,
|
|
model,
|
|
data,
|
|
evals,
|
|
iteration,
|
|
iter_feval
|
|
) {
|
|
if (!length(callbacks)) {
|
|
return(FALSE)
|
|
}
|
|
out <- sapply(callbacks, function(cb) {
|
|
if (is.null(cb$f_after_iter)) {
|
|
return(FALSE)
|
|
}
|
|
should_stop <- cb$f_after_iter(
|
|
cb$env,
|
|
model,
|
|
data,
|
|
evals,
|
|
iteration,
|
|
iter_feval
|
|
)
|
|
if (!NROW(should_stop)) {
|
|
should_stop <- FALSE
|
|
} else if (NROW(should_stop) > 1) {
|
|
should_stop <- head(as.logical(should_stop), 1)
|
|
}
|
|
return(should_stop)
|
|
})
|
|
return(any(out))
|
|
}
|
|
|
|
.execute.cb.after.training <- function(
|
|
callbacks,
|
|
model,
|
|
data,
|
|
evals,
|
|
iteration,
|
|
final_feval,
|
|
prev_cb_res
|
|
) {
|
|
if (!length(callbacks)) {
|
|
return(NULL)
|
|
}
|
|
old_cb_res <- attributes(model)
|
|
out <- lapply(callbacks, function(cb) {
|
|
if (is.null(cb$f_after_training)) {
|
|
return(NULL)
|
|
} else {
|
|
return(
|
|
cb$f_after_training(
|
|
cb$env,
|
|
model,
|
|
data,
|
|
evals,
|
|
iteration,
|
|
final_feval,
|
|
getElement(old_cb_res, cb$cb_name)
|
|
)
|
|
)
|
|
}
|
|
})
|
|
names(out) <- sapply(callbacks, function(cb) cb$cb_name)
|
|
if (NROW(out)) {
|
|
out <- out[!sapply(out, is.null)]
|
|
}
|
|
return(out)
|
|
}
|
|
|
|
.summarize.feval <- function(iter_feval, showsd) {
|
|
if (NCOL(iter_feval) > 1L && showsd) {
|
|
stdev <- apply(iter_feval, 1, sd)
|
|
} else {
|
|
stdev <- NULL
|
|
}
|
|
if (NCOL(iter_feval) > 1L) {
|
|
iter_feval <- rowMeans(iter_feval)
|
|
}
|
|
return(list(feval = iter_feval, stdev = stdev))
|
|
}
|
|
|
|
.print.evaluation <- function(iter_feval, showsd, iteration) {
|
|
tmp <- .summarize.feval(iter_feval, showsd)
|
|
msg <- .format_eval_string(iteration, tmp$feval, tmp$stdev)
|
|
cat(msg, '\n')
|
|
}
|
|
|
|
# Format the evaluation metric string
|
|
.format_eval_string <- function(iter, eval_res, eval_err = NULL) {
|
|
if (length(eval_res) == 0)
|
|
stop('no evaluation results')
|
|
enames <- names(eval_res)
|
|
if (is.null(enames))
|
|
stop('evaluation results must have names')
|
|
iter <- sprintf('[%d]\t', iter)
|
|
if (!is.null(eval_err)) {
|
|
if (length(eval_res) != length(eval_err))
|
|
stop('eval_res & eval_err lengths mismatch')
|
|
# Note: UTF-8 code for plus/minus sign is U+00B1
|
|
res <- paste0(sprintf("%s:%f\U00B1%f", enames, eval_res, eval_err), collapse = '\t')
|
|
} else {
|
|
res <- paste0(sprintf("%s:%f", enames, eval_res), collapse = '\t')
|
|
}
|
|
return(paste0(iter, res))
|
|
}
|
|
|
|
#' @title Callback for printing the result of evaluation
|
|
#' @param period results would be printed every number of periods
|
|
#' @param showsd whether standard deviations should be printed (when available)
|
|
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
|
#' @description
|
|
#' The callback function prints the result of evaluation at every \code{period} iterations.
|
|
#' The initial and the last iteration's evaluations are always printed.
|
|
#'
|
|
#' Does not leave any attribute in the booster (see \link{xgb.cb.evaluation.log} for that).
|
|
#' @seealso \link{xgb.Callback}
|
|
#' @export
|
|
xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
|
|
if (length(period) != 1 || period != floor(period) || period < 1) {
|
|
stop("'period' must be a positive integer.")
|
|
}
|
|
|
|
xgb.Callback(
|
|
cb_name = "print_evaluation",
|
|
env = as.environment(list(period = period, showsd = showsd, is_first_call = TRUE)),
|
|
f_before_training = NULL,
|
|
f_before_iter = NULL,
|
|
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
|
|
if (is.null(iter_feval)) {
|
|
return(FALSE)
|
|
}
|
|
if (env$is_first_call || (iteration - 1) %% env$period == 0) {
|
|
.print.evaluation(iter_feval, env$showsd, iteration)
|
|
env$last_printed_iter <- iteration
|
|
}
|
|
env$is_first_call <- FALSE
|
|
return(FALSE)
|
|
},
|
|
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
|
|
if (is.null(final_feval)) {
|
|
return(NULL)
|
|
}
|
|
if (is.null(env$last_printed_iter) || iteration > env$last_printed_iter) {
|
|
.print.evaluation(final_feval, env$showsd, iteration)
|
|
}
|
|
}
|
|
)
|
|
}
|
|
|
|
#' @title Callback for logging the evaluation history
|
|
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
|
#' @details This callback creates a table with per-iteration evaluation metrics (see parameters
|
|
#' `evals` and `feval` in \link{xgb.train}).
|
|
#' @details
|
|
#' Note: in the column names of the final data.table, the dash '-' character is replaced with
|
|
#' the underscore '_' in order to make the column names more like regular R identifiers.
|
|
#' @seealso \link{xgb.cb.print.evaluation}
|
|
#' @export
|
|
xgb.cb.evaluation.log <- function() {
|
|
xgb.Callback(
|
|
cb_name = "evaluation_log",
|
|
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
|
|
env$evaluation_log <- vector("list", end_iteration - begin_iteration + 1)
|
|
env$next_log <- 1
|
|
},
|
|
f_before_iter = NULL,
|
|
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
|
|
tmp <- .summarize.feval(iter_feval, TRUE)
|
|
env$evaluation_log[[env$next_log]] <- list(iter = iteration, metrics = tmp$feval, sds = tmp$stdev)
|
|
env$next_log <- env$next_log + 1
|
|
return(FALSE)
|
|
},
|
|
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
|
|
if (!NROW(env$evaluation_log)) {
|
|
return(prev_cb_res)
|
|
}
|
|
# in case of early stopping
|
|
if (env$next_log <= length(env$evaluation_log)) {
|
|
env$evaluation_log <- head(env$evaluation_log, env$next_log - 1)
|
|
}
|
|
|
|
iters <- data.frame(iter = sapply(env$evaluation_log, function(x) x$iter))
|
|
metrics <- do.call(rbind, lapply(env$evaluation_log, function(x) x$metrics))
|
|
mnames <- gsub("-", "_", names(env$evaluation_log[[1]]$metrics), fixed = TRUE)
|
|
colnames(metrics) <- mnames
|
|
has_sds <- !is.null(env$evaluation_log[[1]]$sds)
|
|
if (has_sds) {
|
|
sds <- do.call(rbind, lapply(env$evaluation_log, function(x) x$sds))
|
|
colnames(sds) <- mnames
|
|
metrics <- lapply(
|
|
mnames,
|
|
function(metric) {
|
|
out <- cbind(metrics[, metric], sds[, metric])
|
|
colnames(out) <- paste0(metric, c("_mean", "_std"))
|
|
return(out)
|
|
}
|
|
)
|
|
metrics <- do.call(cbind, metrics)
|
|
}
|
|
evaluation_log <- cbind(iters, metrics)
|
|
|
|
if (!is.null(prev_cb_res)) {
|
|
if (!is.data.table(prev_cb_res)) {
|
|
prev_cb_res <- data.table::as.data.table(prev_cb_res)
|
|
}
|
|
prev_take <- prev_cb_res[prev_cb_res$iter < min(evaluation_log$iter)]
|
|
if (nrow(prev_take)) {
|
|
evaluation_log <- rbind(prev_cb_res, evaluation_log)
|
|
}
|
|
}
|
|
evaluation_log <- data.table::as.data.table(evaluation_log)
|
|
return(evaluation_log)
|
|
}
|
|
)
|
|
}
|
|
|
|
#' @title Callback for resetting the booster's parameters at each iteration.
|
|
#' @param new_params a list where each element corresponds to a parameter that needs to be reset.
|
|
#' Each element's value must be either a vector of values of length \code{nrounds}
|
|
#' to be set at each iteration,
|
|
#' or a function of two parameters \code{learning_rates(iteration, nrounds)}
|
|
#' which returns a new parameter value by using the current iteration number
|
|
#' and the total number of boosting rounds.
|
|
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
|
#' @details
|
|
#' Note that when training is resumed from some previous model, and a function is used to
|
|
#' reset a parameter value, the \code{nrounds} argument in this function would be the
|
|
#' the number of boosting rounds in the current training.
|
|
#'
|
|
#' Does not leave any attribute in the booster.
|
|
#' @export
|
|
xgb.cb.reset.parameters <- function(new_params) {
|
|
stopifnot(is.list(new_params))
|
|
pnames <- gsub(".", "_", names(new_params), fixed = TRUE)
|
|
not_allowed <- pnames %in%
|
|
c('num_class', 'num_output_group', 'size_leaf_vector', 'updater_seq')
|
|
if (any(not_allowed))
|
|
stop('Parameters ', paste(pnames[not_allowed]), " cannot be changed during boosting.")
|
|
|
|
xgb.Callback(
|
|
cb_name = "reset_parameters",
|
|
env = as.environment(list(new_params = new_params)),
|
|
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
|
|
env$end_iteration <- end_iteration
|
|
|
|
pnames <- gsub(".", "_", names(env$new_params), fixed = TRUE)
|
|
for (n in pnames) {
|
|
p <- env$new_params[[n]]
|
|
if (is.function(p)) {
|
|
if (length(formals(p)) != 2)
|
|
stop("Parameter '", n, "' is a function but not of two arguments")
|
|
} else if (is.numeric(p) || is.character(p)) {
|
|
if (length(p) != env$end_iteration)
|
|
stop("Length of '", n, "' has to be equal to 'nrounds'")
|
|
} else {
|
|
stop("Parameter '", n, "' is not a function or a vector")
|
|
}
|
|
}
|
|
},
|
|
f_before_iter = function(env, model, data, evals, iteration) {
|
|
pars <- lapply(env$new_params, function(p) {
|
|
if (is.function(p)) {
|
|
return(p(iteration, env$end_iteration))
|
|
} else {
|
|
return(p[iteration])
|
|
}
|
|
})
|
|
|
|
if (inherits(model, "xgb.Booster")) {
|
|
xgb.parameters(model) <- pars
|
|
} else {
|
|
for (fd in model) {
|
|
xgb.parameters(fd$bst) <- pars
|
|
}
|
|
}
|
|
return(FALSE)
|
|
},
|
|
f_after_iter = NULL,
|
|
f_after_training = NULL
|
|
)
|
|
}
|
|
|
|
#' @title Callback to activate early stopping
|
|
#' @param stopping_rounds The number of rounds with no improvement in
|
|
#' the evaluation metric in order to stop the training.
|
|
#' @param maximize Whether to maximize the evaluation metric.
|
|
#' @param metric_name The name of an evaluation column to use as a criteria for early
|
|
#' stopping. If not set, the last column would be used.
|
|
#' Let's say the test data in \code{evals} was labelled as \code{dtest},
|
|
#' and one wants to use the AUC in test data for early stopping regardless of where
|
|
#' it is in the \code{evals}, then one of the following would need to be set:
|
|
#' \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
|
|
#' All dash '-' characters in metric names are considered equivalent to '_'.
|
|
#' @param verbose Whether to print the early stopping information.
|
|
#' @param keep_all_iter Whether to keep all of the boosting rounds that were produced
|
|
#' in the resulting object. If passing `FALSE`, will only keep the boosting rounds
|
|
#' up to the detected best iteration, discarding the ones that come after.
|
|
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
|
#' @description
|
|
#' This callback function determines the condition for early stopping.
|
|
#'
|
|
#' The following attributes are assigned to the booster's object:
|
|
#' \itemize{
|
|
#' \item \code{best_score} the evaluation score at the best iteration
|
|
#' \item \code{best_iteration} at which boosting iteration the best score has occurred
|
|
#' (0-based index for interoperability of binary models)
|
|
#' }
|
|
#'
|
|
#' The same values are also stored as R attributes as a result of the callback, plus an additional
|
|
#' attribute `stopped_by_max_rounds` which indicates whether an early stopping by the `stopping_rounds`
|
|
#' condition occurred. Note that the `best_iteration` that is stored under R attributes will follow
|
|
#' base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
|
|
#' through \link{xgb.attr} or \link{xgb.attributes}.
|
|
#'
|
|
#' At least one dataset is required in `evals` for early stopping to work.
|
|
#' @export
|
|
xgb.cb.early.stop <- function(
|
|
stopping_rounds,
|
|
maximize = FALSE,
|
|
metric_name = NULL,
|
|
verbose = TRUE,
|
|
keep_all_iter = TRUE
|
|
) {
|
|
if (!is.null(metric_name)) {
|
|
stopifnot(is.character(metric_name))
|
|
stopifnot(length(metric_name) == 1L)
|
|
}
|
|
|
|
xgb.Callback(
|
|
cb_name = "early_stop",
|
|
env = as.environment(
|
|
list(
|
|
checked_evnames = FALSE,
|
|
stopping_rounds = stopping_rounds,
|
|
maximize = maximize,
|
|
metric_name = metric_name,
|
|
verbose = verbose,
|
|
keep_all_iter = keep_all_iter,
|
|
stopped_by_max_rounds = FALSE
|
|
)
|
|
),
|
|
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
|
|
if (inherits(model, "xgb.Booster") && !length(evals)) {
|
|
stop("For early stopping, 'evals' must have at least one element")
|
|
}
|
|
env$begin_iteration <- begin_iteration
|
|
return(NULL)
|
|
},
|
|
f_before_iter = function(env, model, data, evals, iteration) NULL,
|
|
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
|
|
sds <- NULL
|
|
if (NCOL(iter_feval) > 1) {
|
|
tmp <- .summarize.feval(iter_feval, TRUE)
|
|
iter_feval <- tmp$feval
|
|
sds <- tmp$stdev
|
|
}
|
|
|
|
if (!env$checked_evnames) {
|
|
|
|
eval_names <- gsub('-', '_', names(iter_feval), fixed = TRUE)
|
|
if (!is.null(env$metric_name)) {
|
|
env$metric_idx <- which(gsub('-', '_', env$metric_name, fixed = TRUE) == eval_names)
|
|
if (length(env$metric_idx) == 0)
|
|
stop("'metric_name' for early stopping is not one of the following:\n",
|
|
paste(eval_names, collapse = ' '), '\n')
|
|
}
|
|
|
|
if (is.null(env$metric_name)) {
|
|
if (NROW(iter_feval) == 1) {
|
|
env$metric_idx <- 1L
|
|
} else {
|
|
env$metric_idx <- length(eval_names)
|
|
if (env$verbose)
|
|
cat('Multiple eval metrics are present. Will use ',
|
|
eval_names[env$metric_idx], ' for early stopping.\n', sep = '')
|
|
}
|
|
}
|
|
|
|
env$metric_name <- eval_names[env$metric_idx]
|
|
|
|
# maximize is usually NULL when not set in xgb.train and built-in metrics
|
|
if (is.null(env$maximize))
|
|
env$maximize <- grepl('(_auc|_aupr|_map|_ndcg|_pre)', env$metric_name)
|
|
|
|
if (env$verbose)
|
|
cat("Will train until ", env$metric_name, " hasn't improved in ",
|
|
env$stopping_rounds, " rounds.\n\n", sep = '')
|
|
|
|
env$best_iteration <- env$begin_iteration
|
|
if (env$maximize) {
|
|
env$best_score <- -Inf
|
|
} else {
|
|
env$best_score <- Inf
|
|
}
|
|
|
|
if (inherits(model, "xgb.Booster")) {
|
|
best_score <- xgb.attr(model, 'best_score')
|
|
if (NROW(best_score)) env$best_score <- as.numeric(best_score)
|
|
best_iteration <- xgb.attr(model, 'best_iteration')
|
|
if (NROW(best_iteration)) env$best_iteration <- as.numeric(best_iteration) + 1
|
|
}
|
|
|
|
env$checked_evnames <- TRUE
|
|
}
|
|
|
|
score <- iter_feval[env$metric_idx]
|
|
if ((env$maximize && score > env$best_score) ||
|
|
(!env$maximize && score < env$best_score)) {
|
|
|
|
env$best_score <- score
|
|
env$best_iteration <- iteration
|
|
# save the property to attributes, so they will occur in checkpoint
|
|
if (inherits(model, "xgb.Booster")) {
|
|
xgb.attributes(model) <- list(
|
|
best_iteration = env$best_iteration - 1, # convert to 0-based index
|
|
best_score = env$best_score
|
|
)
|
|
}
|
|
} else if (iteration - env$best_iteration >= env$stopping_rounds) {
|
|
if (env$verbose) {
|
|
best_msg <- .format_eval_string(iteration, iter_feval, sds)
|
|
cat("Stopping. Best iteration:\n", best_msg, "\n\n", sep = '')
|
|
}
|
|
env$stopped_by_max_rounds <- TRUE
|
|
return(TRUE)
|
|
}
|
|
return(FALSE)
|
|
},
|
|
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
|
|
if (inherits(model, "xgb.Booster") && !env$keep_all_iter && env$best_iteration < iteration) {
|
|
# Note: it loses the attributes after being sliced,
|
|
# so they have to be re-assigned afterwards.
|
|
prev_attr <- xgb.attributes(model)
|
|
if (NROW(prev_attr)) {
|
|
suppressWarnings({
|
|
prev_attr <- within(prev_attr, rm("best_score", "best_iteration"))
|
|
})
|
|
}
|
|
.Call(XGBoosterSliceAndReplace_R, xgb.get.handle(model), 0L, env$best_iteration, 1L)
|
|
if (NROW(prev_attr)) {
|
|
xgb.attributes(model) <- prev_attr
|
|
}
|
|
}
|
|
attrs_set <- list(best_iteration = env$best_iteration - 1, best_score = env$best_score)
|
|
if (inherits(model, "xgb.Booster")) {
|
|
xgb.attributes(model) <- attrs_set
|
|
} else {
|
|
for (fd in model) {
|
|
xgb.attributes(fd$bst) <- attrs_set # to use in the cv.predict callback
|
|
}
|
|
}
|
|
return(
|
|
list(
|
|
best_iteration = env$best_iteration,
|
|
best_score = env$best_score,
|
|
stopped_by_max_rounds = env$stopped_by_max_rounds
|
|
)
|
|
)
|
|
}
|
|
)
|
|
}
|
|
|
|
.save.model.w.formatted.name <- function(model, save_name, iteration) {
|
|
# Note: this throws a warning if the name doesn't have anything to format through 'sprintf'
|
|
suppressWarnings({
|
|
save_name <- sprintf(save_name, iteration)
|
|
})
|
|
xgb.save(model, save_name)
|
|
}
|
|
|
|
#' @title Callback for saving a model file.
|
|
#' @param save_period Save the model to disk after every
|
|
#' \code{save_period} iterations; 0 means save the model at the end.
|
|
#' @param save_name The name or path for the saved model file.
|
|
#' It can contain a \code{\link[base]{sprintf}} formatting specifier
|
|
#' to include the integer iteration number in the file name.
|
|
#' E.g., with \code{save_name} = 'xgboost_%04d.model',
|
|
#' the file saved at iteration 50 would be named "xgboost_0050.model".
|
|
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train},
|
|
#' but \bold{not} to \link{xgb.cv}.
|
|
#' @description
|
|
#' This callback function allows to save an xgb-model file, either periodically
|
|
#' after each \code{save_period}'s or at the end.
|
|
#'
|
|
#' Does not leave any attribute in the booster.
|
|
#' @export
|
|
xgb.cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
|
|
if (save_period < 0) {
|
|
stop("'save_period' cannot be negative")
|
|
}
|
|
if (!is.character(save_name) || length(save_name) != 1L) {
|
|
stop("'save_name' must be a single character refering to file name.")
|
|
}
|
|
|
|
xgb.Callback(
|
|
cb_name = "save_model",
|
|
env = as.environment(list(save_period = save_period, save_name = save_name, last_save = 0)),
|
|
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
|
|
env$begin_iteration <- begin_iteration
|
|
},
|
|
f_before_iter = NULL,
|
|
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
|
|
if (env$save_period > 0 && (iteration - env$begin_iteration) %% env$save_period == 0) {
|
|
.save.model.w.formatted.name(model, env$save_name, iteration)
|
|
env$last_save <- iteration
|
|
}
|
|
return(FALSE)
|
|
},
|
|
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
|
|
if (env$save_period == 0 && iteration > env$last_save) {
|
|
.save.model.w.formatted.name(model, env$save_name, iteration)
|
|
}
|
|
}
|
|
)
|
|
}
|
|
|
|
#' @title Callback for returning cross-validation based predictions.
|
|
#' @param save_models A flag for whether to save the folds' models.
|
|
#' @param outputmargin Whether to save margin predictions (same effect as passing this
|
|
#' parameter to \link{predict.xgb.Booster}).
|
|
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.cv},
|
|
#' but \bold{not} to \link{xgb.train}.
|
|
#' @description
|
|
#' This callback function saves predictions for all of the test folds,
|
|
#' and also allows to save the folds' models.
|
|
#' @details
|
|
#' Predictions are saved inside of the \code{pred} element, which is either a vector or a matrix,
|
|
#' depending on the number of prediction outputs per data row. The order of predictions corresponds
|
|
#' to the order of rows in the original dataset. Note that when a custom \code{folds} list is
|
|
#' provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a
|
|
#' non-overlapping list of k sets of indices, as in a standard k-fold CV. The predictions would not be
|
|
#' meaningful when user-provided folds have overlapping indices as in, e.g., random sampling splits.
|
|
#' When some of the indices in the training dataset are not included into user-provided \code{folds},
|
|
#' their prediction value would be \code{NA}.
|
|
#' @export
|
|
xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
|
|
xgb.Callback(
|
|
cb_name = "cv_predict",
|
|
env = as.environment(list(save_models = save_models, outputmargin = outputmargin)),
|
|
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
|
|
if (inherits(model, "xgb.Booster")) {
|
|
stop("'cv.predict' callback is only for 'xgb.cv'.")
|
|
}
|
|
},
|
|
f_before_iter = NULL,
|
|
f_after_iter = NULL,
|
|
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
|
|
pred <- NULL
|
|
for (fd in model) {
|
|
pr <- predict(
|
|
fd$bst,
|
|
fd$evals[[2L]],
|
|
outputmargin = env$outputmargin,
|
|
reshape = TRUE
|
|
)
|
|
if (is.null(pred)) {
|
|
if (NCOL(pr) > 1L) {
|
|
pred <- matrix(NA_real_, nrow(data), ncol(pr))
|
|
} else {
|
|
pred <- matrix(NA_real_, nrow(data))
|
|
}
|
|
}
|
|
if (is.matrix(pred)) {
|
|
pred[fd$index, ] <- pr
|
|
} else {
|
|
pred[fd$index] <- pr
|
|
}
|
|
}
|
|
out <- list(pred = pred)
|
|
if (env$save_models) {
|
|
out$models <- lapply(model, function(fd) fd$bst)
|
|
}
|
|
return(out)
|
|
}
|
|
)
|
|
}
|
|
|
|
.list2mat <- function(coef_list, sparse) {
|
|
if (sparse) {
|
|
coef_mat <- methods::new("dgRMatrix")
|
|
coef_mat@p <- as.integer(c(0, cumsum(sapply(coef_list, function(x) length(x@x)))))
|
|
coef_mat@j <- as.integer(unlist(lapply(coef_list, slot, "i")) - 1L)
|
|
coef_mat@x <- unlist(lapply(coef_list, slot, "x"))
|
|
coef_mat@Dim <- as.integer(c(length(coef_list), length(coef_list[[1L]])))
|
|
# Note: function 'xgb.gblinear.history' might later on try to slice by columns
|
|
coef_mat <- methods::as(coef_mat, "CsparseMatrix")
|
|
return(coef_mat)
|
|
} else {
|
|
return(unname(do.call(rbind, coef_list)))
|
|
}
|
|
}
|
|
|
|
.extract.coef <- function(model, sparse) {
|
|
coefs <- .internal.coef.xgb.Booster(model, add_names = FALSE)
|
|
if (NCOL(coefs) > 1L) {
|
|
coefs <- as.vector(coefs)
|
|
}
|
|
if (sparse) {
|
|
coefs <- methods::as(coefs, "sparseVector")
|
|
}
|
|
return(coefs)
|
|
}
|
|
|
|
#' @title Callback for collecting coefficients history of a gblinear booster
|
|
#' @param sparse when set to `FALSE`/`TRUE`, a dense/sparse matrix is used to store the result.
|
|
#' Sparse format is useful when one expects only a subset of coefficients to be non-zero,
|
|
#' when using the "thrifty" feature selector with fairly small number of top features
|
|
#' selected per iteration.
|
|
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
|
#' @details
|
|
#' To keep things fast and simple, gblinear booster does not internally store the history of linear
|
|
#' model coefficients at each boosting iteration. This callback provides a workaround for storing
|
|
#' the coefficients' path, by extracting them after each training iteration.
|
|
#'
|
|
#' This callback will construct a matrix where rows are boosting iterations and columns are
|
|
#' feature coefficients (same order as when calling \link{coef.xgb.Booster}, with the intercept
|
|
#' corresponding to the first column).
|
|
#'
|
|
#' When there is more than one coefficient per feature (e.g. multi-class classification),
|
|
#' the result will be reshaped into a vector where coefficients are arranged first by features and
|
|
#' then by class (e.g. first 1 through N coefficients will be for the first class, then
|
|
#' coefficients N+1 through 2N for the second class, and so on).
|
|
#'
|
|
#' If the result has only one coefficient per feature in the data, then the resulting matrix
|
|
#' will have column names matching with the feature names, otherwise (when there's more than
|
|
#' one coefficient per feature) the names will be composed as 'column name' + ':' + 'class index'
|
|
#' (so e.g. column 'c1' for class '0' will be named 'c1:0').
|
|
#'
|
|
#' With \code{xgb.train}, the output is either a dense or a sparse matrix.
|
|
#' With with \code{xgb.cv}, it is a list (one element per each fold) of such
|
|
#' matrices.
|
|
#'
|
|
#' Function \link{xgb.gblinear.history} function provides an easy way to retrieve the
|
|
#' outputs from this callback.
|
|
#' @seealso \link{xgb.gblinear.history}, \link{coef.xgb.Booster}.
|
|
#' @examples
|
|
#' #### Binary classification:
|
|
#'
|
|
#' ## Keep the number of threads to 1 for examples
|
|
#' nthread <- 1
|
|
#' data.table::setDTthreads(nthread)
|
|
#'
|
|
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
|
#' # without considering the 2nd order interactions:
|
|
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
|
#' colnames(x)
|
|
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
|
|
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
|
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
|
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
|
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
|
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
|
|
#' # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations.
|
|
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1.,
|
|
#' callbacks = list(xgb.cb.gblinear.history()))
|
|
#' # Extract the coefficients' path and plot them vs boosting iteration number:
|
|
#' coef_path <- xgb.gblinear.history(bst)
|
|
#' matplot(coef_path, type = 'l')
|
|
#'
|
|
#' # With the deterministic coordinate descent updater, it is safer to use higher learning rates.
|
|
#' # Will try the classical componentwise boosting which selects a single best feature per round:
|
|
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8,
|
|
#' updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1,
|
|
#' callbacks = list(xgb.cb.gblinear.history()))
|
|
#' matplot(xgb.gblinear.history(bst), type = 'l')
|
|
#' # Componentwise boosting is known to have similar effect to Lasso regularization.
|
|
#' # Try experimenting with various values of top_k, eta, nrounds,
|
|
#' # as well as different feature_selectors.
|
|
#'
|
|
#' # For xgb.cv:
|
|
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
|
#' callbacks = list(xgb.cb.gblinear.history()))
|
|
#' # coefficients in the CV fold #3
|
|
#' matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
|
#'
|
|
#'
|
|
#' #### Multiclass classification:
|
|
#' #
|
|
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
|
|
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
|
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
|
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
|
#' # to use smaller eta to reduce instability
|
|
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
|
#' callbacks = list(xgb.cb.gblinear.history()))
|
|
#' # Will plot the coefficient paths separately for each class:
|
|
#' matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l')
|
|
#' matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l')
|
|
#' matplot(xgb.gblinear.history(bst, class_index = 2), type = 'l')
|
|
#'
|
|
#' # CV:
|
|
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5,
|
|
#' callbacks = list(xgb.cb.gblinear.history(FALSE)))
|
|
#' # 1st fold of 1st class
|
|
#' matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l')
|
|
#'
|
|
#' @export
|
|
xgb.cb.gblinear.history <- function(sparse = FALSE) {
|
|
xgb.Callback(
|
|
cb_name = "gblinear_history",
|
|
env = as.environment(list(sparse = sparse)),
|
|
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
|
|
if (!inherits(model, "xgb.Booster")) {
|
|
model <- model[[1L]]$bst
|
|
}
|
|
if (xgb.booster_type(model) != "gblinear") {
|
|
stop("Callback 'xgb.cb.gblinear.history' is only for booster='gblinear'.")
|
|
}
|
|
env$coef_hist <- vector("list", end_iteration - begin_iteration + 1)
|
|
env$next_idx <- 1
|
|
},
|
|
f_before_iter = NULL,
|
|
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
|
|
if (inherits(model, "xgb.Booster")) {
|
|
coef_this <- .extract.coef(model, env$sparse)
|
|
} else {
|
|
coef_this <- lapply(model, function(fd) .extract.coef(fd$bst, env$sparse))
|
|
}
|
|
env$coef_hist[[env$next_idx]] <- coef_this
|
|
env$next_idx <- env$next_idx + 1
|
|
return(FALSE)
|
|
},
|
|
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
|
|
# in case of early stopping
|
|
if (env$next_idx <= length(env$coef_hist)) {
|
|
env$coef_hist <- head(env$coef_hist, env$next_idx - 1)
|
|
}
|
|
|
|
is_booster <- inherits(model, "xgb.Booster")
|
|
if (is_booster) {
|
|
out <- .list2mat(env$coef_hist, env$sparse)
|
|
} else {
|
|
out <- lapply(
|
|
X = lapply(
|
|
X = seq_along(env$coef_hist[[1]]),
|
|
FUN = function(i) lapply(env$coef_hist, "[[", i)
|
|
),
|
|
FUN = .list2mat,
|
|
env$sparse
|
|
)
|
|
}
|
|
if (!is.null(prev_cb_res)) {
|
|
if (is_booster) {
|
|
out <- rbind(prev_cb_res, out)
|
|
} else {
|
|
# Note: this case should never be encountered, since training cannot
|
|
# be continued from the result of xgb.cv, but this code should in
|
|
# theory do the job if the situation were to be encountered.
|
|
out <- lapply(
|
|
out,
|
|
function(lst) {
|
|
lapply(
|
|
seq_along(lst),
|
|
function(i) rbind(prev_cb_res[[i]], lst[[i]])
|
|
)
|
|
}
|
|
)
|
|
}
|
|
}
|
|
feature_names <- getinfo(data, "feature_name")
|
|
if (!NROW(feature_names)) {
|
|
feature_names <- paste0("V", seq(1L, ncol(data)))
|
|
}
|
|
expected_ncols <- length(feature_names) + 1
|
|
if (is_booster) {
|
|
mat_ncols <- ncol(out)
|
|
} else {
|
|
mat_ncols <- ncol(out[[1L]])
|
|
}
|
|
if (mat_ncols %% expected_ncols == 0) {
|
|
feature_names <- c("(Intercept)", feature_names)
|
|
n_rep <- mat_ncols / expected_ncols
|
|
if (n_rep > 1) {
|
|
feature_names <- unlist(
|
|
lapply(
|
|
seq(1, n_rep),
|
|
function(cl) paste(feature_names, cl - 1, sep = ":")
|
|
)
|
|
)
|
|
}
|
|
if (is_booster) {
|
|
colnames(out) <- feature_names
|
|
} else {
|
|
out <- lapply(
|
|
out,
|
|
function(mat) {
|
|
colnames(mat) <- feature_names
|
|
return(mat)
|
|
}
|
|
)
|
|
}
|
|
}
|
|
return(out)
|
|
}
|
|
)
|
|
}
|
|
|
|
#' @title Extract gblinear coefficients history.
|
|
#' @description A helper function to extract the matrix of linear coefficients' history
|
|
#' from a gblinear model created while using the \link{xgb.cb.gblinear.history}
|
|
#' callback (which must be added manually as by default it's not used).
|
|
#' @details Note that this is an R-specific function that relies on R attributes that
|
|
#' are not saved when using xgboost's own serialization functions like \link{xgb.load}
|
|
#' or \link{xgb.load.raw}.
|
|
#'
|
|
#' In order for a serialized model to be accepted by this function, one must use R
|
|
#' serializers such as \link{saveRDS}.
|
|
#' @param model either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained
|
|
#' using the \link{xgb.cb.gblinear.history} callback, but \bold{not} a booster
|
|
#' loaded from \link{xgb.load} or \link{xgb.load.raw}.
|
|
#' @param class_index zero-based class index to extract the coefficients for only that
|
|
#' specific class in a multinomial multiclass model. When it is NULL, all the
|
|
#' coefficients are returned. Has no effect in non-multiclass models.
|
|
#'
|
|
#' @return
|
|
#' For an \link{xgb.train} result, a matrix (either dense or sparse) with the columns
|
|
#' corresponding to iteration's coefficients and the rows corresponding to boosting iterations.
|
|
#'
|
|
#' For an \link{xgb.cv} result, a list of such matrices is returned with the elements
|
|
#' corresponding to CV folds.
|
|
#'
|
|
#' When there is more than one coefficient per feature (e.g. multi-class classification)
|
|
#' and `class_index` is not provided,
|
|
#' the result will be reshaped into a vector where coefficients are arranged first by features and
|
|
#' then by class (e.g. first 1 through N coefficients will be for the first class, then
|
|
#' coefficients N+1 through 2N for the second class, and so on).
|
|
#' @seealso \link{xgb.cb.gblinear.history}, \link{coef.xgb.Booster}.
|
|
#' @export
|
|
xgb.gblinear.history <- function(model, class_index = NULL) {
|
|
|
|
if (!(inherits(model, "xgb.Booster") ||
|
|
inherits(model, "xgb.cv.synchronous")))
|
|
stop("model must be an object of either xgb.Booster or xgb.cv.synchronous class")
|
|
is_cv <- inherits(model, "xgb.cv.synchronous")
|
|
|
|
if (!is_cv) {
|
|
coef_path <- getElement(attributes(model), "gblinear_history")
|
|
} else {
|
|
coef_path <- getElement(model, "gblinear_history")
|
|
}
|
|
if (is.null(coef_path)) {
|
|
stop("model must be trained while using the xgb.cb.gblinear.history() callback")
|
|
}
|
|
|
|
if (!is_cv) {
|
|
num_class <- xgb.num_class(model)
|
|
num_feat <- xgb.num_feature(model)
|
|
} else {
|
|
# in case of CV, the object is expected to have this info
|
|
if (model$params$booster != "gblinear")
|
|
stop("It does not appear to be a gblinear model")
|
|
num_class <- NVL(model$params$num_class, 1)
|
|
num_feat <- model$nfeatures
|
|
if (is.null(num_feat))
|
|
stop("This xgb.cv result does not have nfeatures info")
|
|
}
|
|
|
|
if (!is.null(class_index) &&
|
|
num_class > 1 &&
|
|
(class_index[1] < 0 || class_index[1] >= num_class))
|
|
stop("class_index has to be within [0,", num_class - 1, "]")
|
|
|
|
if (!is.null(class_index) && num_class > 1) {
|
|
seq_take <- seq(1 + class_index * (num_feat + 1), (class_index + 1) * (num_feat + 1))
|
|
coef_path <- if (is.list(coef_path)) {
|
|
lapply(coef_path, function(x) x[, seq_take])
|
|
} else {
|
|
coef_path <- coef_path[, seq_take]
|
|
}
|
|
}
|
|
return(coef_path)
|
|
}
|
|
|
|
.callbacks.only.train <- "save_model"
|
|
.callbacks.only.cv <- "cv_predict"
|
|
|
|
.process.callbacks <- function(callbacks, is_cv) {
|
|
if (inherits(callbacks, "xgb.Callback")) {
|
|
callbacks <- list(callbacks)
|
|
}
|
|
if (!is.list(callbacks)) {
|
|
stop("'callbacks' must be a list.")
|
|
}
|
|
cb_names <- character()
|
|
if (length(callbacks)) {
|
|
is_callback <- sapply(callbacks, inherits, "xgb.Callback")
|
|
if (!all(is_callback)) {
|
|
stop("Entries in 'callbacks' must be 'xgb.Callback' objects.")
|
|
}
|
|
cb_names <- sapply(callbacks, function(cb) cb$cb_name)
|
|
if (length(cb_names) != length(callbacks)) {
|
|
stop("Passed invalid callback(s).")
|
|
}
|
|
if (anyDuplicated(cb_names) > 0) {
|
|
stop("Callbacks must have unique names.")
|
|
}
|
|
if (is_cv) {
|
|
if (any(.callbacks.only.train %in% cb_names)) {
|
|
stop(
|
|
"Passed callback(s) not supported for 'xgb.cv': ",
|
|
paste(intersect(.callbacks.only.train, cb_names), collapse = ", ")
|
|
)
|
|
}
|
|
} else {
|
|
if (any(.callbacks.only.cv %in% cb_names)) {
|
|
stop(
|
|
"Passed callback(s) not supported for 'xgb.train': ",
|
|
paste(intersect(.callbacks.only.cv, cb_names), collapse = ", ")
|
|
)
|
|
}
|
|
}
|
|
# Early stopping callback needs to be executed before the others
|
|
if ("early_stop" %in% cb_names) {
|
|
mask <- cb_names == "early_stop"
|
|
callbacks <- c(list(callbacks[[which(mask)]]), callbacks[!mask])
|
|
}
|
|
}
|
|
return(list(callbacks = callbacks, cb_names = cb_names))
|
|
}
|
|
|
|
# Note: don't try to use functions like 'append', as they will
|
|
# merge the elements of the different callbacks into a single list.
|
|
add.callback <- function(callbacks, cb, as_first_elt = FALSE) {
|
|
if (!as_first_elt) {
|
|
callbacks[[length(callbacks) + 1]] <- cb
|
|
return(callbacks)
|
|
} else {
|
|
if (!length(callbacks)) {
|
|
return(list(cb))
|
|
}
|
|
new_cb <- vector("list", length(callbacks) + 1)
|
|
new_cb[[1]] <- cb
|
|
new_cb[seq(2, length(new_cb))] <- callbacks
|
|
return(new_cb)
|
|
}
|
|
}
|
|
|
|
has.callbacks <- function(callbacks, cb_name) {
|
|
cb_names <- sapply(callbacks, function(cb) cb$name)
|
|
return(cb_name %in% cb_names)
|
|
}
|