From 2c12b956da33aa9a8ae3dbcc9fe883d732cade46 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Fri, 1 Mar 2024 08:57:47 +0100 Subject: [PATCH] [R] Refactor callback structure and attributes (#9957) --- R-package/NAMESPACE | 22 +- R-package/R/callbacks.R | 1918 ++++++++++------- R-package/R/utils.R | 2 +- R-package/R/xgb.Booster.R | 35 +- R-package/R/xgb.cv.R | 138 +- R-package/R/xgb.load.R | 2 +- R-package/R/xgb.train.R | 189 +- R-package/R/xgboost.R | 7 +- R-package/man/callbacks.Rd | 37 - R-package/man/cb.early.stop.Rd | 62 - R-package/man/cb.evaluation.log.Rd | 31 - R-package/man/cb.print.evaluation.Rd | 29 - R-package/man/cb.save.model.Rd | 40 - R-package/man/xgb.Callback.Rd | 248 +++ ...{cb.cv.predict.Rd => xgb.cb.cv.predict.Rd} | 43 +- R-package/man/xgb.cb.early.stop.Rd | 55 + R-package/man/xgb.cb.evaluation.log.Rd | 24 + ....history.Rd => xgb.cb.gblinear.history.Rd} | 55 +- R-package/man/xgb.cb.print.evaluation.Rd | 25 + ...rameters.Rd => xgb.cb.reset.parameters.Rd} | 25 +- R-package/man/xgb.cb.save.model.Rd | 28 + R-package/man/xgb.cv.Rd | 26 +- R-package/man/xgb.gblinear.history.Rd | 24 +- R-package/man/xgb.load.Rd | 2 +- R-package/man/xgb.train.Rd | 34 +- R-package/src/init.c | 2 + R-package/src/xgboost_R.cc | 15 + R-package/src/xgboost_R.h | 10 + R-package/tests/testthat.R | 1 + R-package/tests/testthat/test_basic.R | 1 - R-package/tests/testthat/test_callbacks.R | 281 ++- R-package/tests/testthat/test_glm.R | 4 +- 32 files changed, 2076 insertions(+), 1339 deletions(-) delete mode 100644 R-package/man/callbacks.Rd delete mode 100644 R-package/man/cb.early.stop.Rd delete mode 100644 R-package/man/cb.evaluation.log.Rd delete mode 100644 R-package/man/cb.print.evaluation.Rd delete mode 100644 R-package/man/cb.save.model.Rd create mode 100644 R-package/man/xgb.Callback.Rd rename R-package/man/{cb.cv.predict.Rd => xgb.cb.cv.predict.Rd} (53%) create mode 100644 R-package/man/xgb.cb.early.stop.Rd create mode 100644 R-package/man/xgb.cb.evaluation.log.Rd rename R-package/man/{cb.gblinear.history.Rd => xgb.cb.gblinear.history.Rd} (63%) create mode 100644 R-package/man/xgb.cb.print.evaluation.Rd rename R-package/man/{cb.reset.parameters.Rd => xgb.cb.reset.parameters.Rd} (57%) create mode 100644 R-package/man/xgb.cb.save.model.Rd diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 580d1f873..c9e085e77 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -20,15 +20,9 @@ export("xgb.attr<-") export("xgb.attributes<-") export("xgb.config<-") export("xgb.parameters<-") -export(cb.cv.predict) -export(cb.early.stop) -export(cb.evaluation.log) -export(cb.gblinear.history) -export(cb.print.evaluation) -export(cb.reset.parameters) -export(cb.save.model) export(getinfo) export(setinfo) +export(xgb.Callback) export(xgb.DMatrix) export(xgb.DMatrix.hasinfo) export(xgb.DMatrix.save) @@ -39,6 +33,13 @@ export(xgb.QuantileDMatrix) export(xgb.QuantileDMatrix.from_iterator) export(xgb.attr) export(xgb.attributes) +export(xgb.cb.cv.predict) +export(xgb.cb.early.stop) +export(xgb.cb.evaluation.log) +export(xgb.cb.gblinear.history) +export(xgb.cb.print.evaluation) +export(xgb.cb.reset.parameters) +export(xgb.cb.save.model) export(xgb.config) export(xgb.copy.Booster) export(xgb.create.features) @@ -72,14 +73,10 @@ export(xgb.slice.DMatrix) export(xgb.train) export(xgboost) import(methods) +importClassesFrom(Matrix,CsparseMatrix) importClassesFrom(Matrix,dgCMatrix) importClassesFrom(Matrix,dgRMatrix) -importClassesFrom(Matrix,dgeMatrix) -importFrom(Matrix,colSums) importFrom(Matrix,sparse.model.matrix) -importFrom(Matrix,sparseMatrix) -importFrom(Matrix,sparseVector) -importFrom(Matrix,t) importFrom(data.table,":=") importFrom(data.table,as.data.table) importFrom(data.table,data.table) @@ -101,6 +98,7 @@ importFrom(methods,new) importFrom(stats,coef) importFrom(stats,median) importFrom(stats,predict) +importFrom(stats,sd) importFrom(stats,variable.names) importFrom(utils,head) importFrom(utils,object.size) diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index 02e0a7cd4..d768e1b9e 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -1,769 +1,392 @@ -#' Callback closures for booster training. -#' -#' These are used to perform various service tasks either during boosting iterations or at the end. -#' This approach helps to modularize many of such tasks without bloating the main training methods, -#' and it offers . -#' -#' @details -#' By default, a callback function is run after each boosting iteration. -#' An R-attribute \code{is_pre_iteration} could be set for a callback to define a pre-iteration function. -#' -#' When a callback function has \code{finalize} parameter, its finalizer part will also be run after -#' the boosting is completed. -#' -#' WARNING: side-effects!!! Be aware that these callback functions access and modify things in -#' the environment from which they are called from, which is a fairly uncommon thing to do in R. -#' -#' To write a custom callback closure, make sure you first understand the main concepts about R environments. -#' Check either R documentation on \code{\link[base]{environment}} or the -#' \href{http://adv-r.had.co.nz/Environments.html}{Environments chapter} from the "Advanced R" -#' book by Hadley Wickham. Further, the best option is to read the code of some of the existing callbacks - -#' choose ones that do something similar to what you want to achieve. Also, you would need to get familiar -#' with the objects available inside of the \code{xgb.train} and \code{xgb.cv} internal environments. -#' -#' @seealso -#' \code{\link{cb.print.evaluation}}, -#' \code{\link{cb.evaluation.log}}, -#' \code{\link{cb.reset.parameters}}, -#' \code{\link{cb.early.stop}}, -#' \code{\link{cb.save.model}}, -#' \code{\link{cb.cv.predict}}, -#' \code{\link{xgb.train}}, -#' \code{\link{xgb.cv}} -#' -#' @name callbacks -NULL +.reserved_cb_names <- c("names", "class", "call", "params", "niter", "nfeatures", "folds") -# -# Callbacks ------------------------------------------------------------------- -# - -#' Callback closure for printing the result of evaluation +#' @title XGBoost Callback Constructor +#' @description Constructor for defining the structure of callback functions that can be executed +#' at different stages of model training (before / after training, before / after each boosting +#' iteration). +#' @param cb_name Name for the callback. #' -#' @param period results would be printed every number of periods -#' @param showsd whether standard deviations should be printed (when available) +#' If the callback produces some non-NULL result (from executing the function passed under +#' `f_after_training`), that result will be added as an R attribute to the resulting booster +#' (or as a named element in the result of CV), with the attribute name specified here. #' -#' @details -#' The callback function prints the result of evaluation at every \code{period} iterations. -#' The initial and the last iteration's evaluations are always printed. +#' Names of callbacks must be unique - i.e. there cannot be two callbacks with the same name. +#' @param env An environment object that will be passed to the different functions in the callback. +#' Note that this environment will not be shared with other callbacks. +#' @param f_before_training A function that will be executed before the training has started. #' -#' Callback function expects the following values to be set in its calling frame: -#' \code{bst_evaluation} (also \code{bst_evaluation_err} when available), -#' \code{iteration}, -#' \code{begin_iteration}, -#' \code{end_iteration}. +#' If passing `NULL` for this or for the other function inputs, then no function will be executed. #' -#' @seealso -#' \code{\link{callbacks}} +#' If passing a function, it will be called with parameters supplied as non-named arguments +#' matching the function signatures that are shown in the default value for each function argument. +#' @param f_before_iter A function that will be executed before each boosting round. #' -#' @export -cb.print.evaluation <- function(period = 1, showsd = TRUE) { - - callback <- function(env = parent.frame()) { - if (length(env$bst_evaluation) == 0 || - period == 0 || - NVL(env$rank, 0) != 0) - return() - - i <- env$iteration - if ((i - 1) %% period == 0 || - i == env$begin_iteration || - i == env$end_iteration) { - stdev <- if (showsd) env$bst_evaluation_err else NULL - msg <- .format_eval_string(i, env$bst_evaluation, stdev) - cat(msg, '\n') - } - } - attr(callback, 'call') <- match.call() - attr(callback, 'name') <- 'cb.print.evaluation' - callback -} - - -#' Callback closure for logging the evaluation history +#' This function can signal whether the training should be finalized or not, by outputting +#' a value that evaluates to `TRUE` - i.e. if the output from the function provided here at +#' a given round is `TRUE`, then training will be stopped before the current iteration happens. #' -#' @details -#' This callback function appends the current iteration evaluation results \code{bst_evaluation} -#' available in the calling parent frame to the \code{evaluation_log} list in a calling frame. +#' Return values of `NULL` will be interpreted as `FALSE`. +#' @param f_after_iter A function that will be executed after each boosting round. #' -#' The finalizer callback (called with \code{finalize = TURE} in the end) converts -#' the \code{evaluation_log} list into a final data.table. +#' This function can signal whether the training should be finalized or not, by outputting +#' a value that evaluates to `TRUE` - i.e. if the output from the function provided here at +#' a given round is `TRUE`, then training will be stopped at that round. #' -#' The iteration evaluation result \code{bst_evaluation} must be a named numeric vector. +#' Return values of `NULL` will be interpreted as `FALSE`. +#' @param f_after_training A function that will be executed after training is finished. #' -#' Note: in the column names of the final data.table, the dash '-' character is replaced with -#' the underscore '_' in order to make the column names more like regular R identifiers. +#' This function can optionally output something non-NULL, which will become part of the R +#' attributes of the booster (assuming one passes `keep_extra_attributes=TRUE` to \link{xgb.train}) +#' under the name supplied for parameter `cb_name` imn the case of \link{xgb.train}; or a part +#' of the named elements in the result of \link{xgb.cv}. +#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +#' @details Arguments that will be passed to the supplied functions are as follows:\itemize{ #' -#' Callback function expects the following values to be set in its calling frame: -#' \code{evaluation_log}, -#' \code{bst_evaluation}, -#' \code{iteration}. +#' \item env The same environment that is passed under argument `env`. #' -#' @seealso -#' \code{\link{callbacks}} +#' It may be modified by the functions in order to e.g. keep tracking of what happens +#' across iterations or similar. #' -#' @export -cb.evaluation.log <- function() { - - mnames <- NULL - - init <- function(env) { - if (!is.list(env$evaluation_log)) - stop("'evaluation_log' has to be a list") - mnames <<- names(env$bst_evaluation) - if (is.null(mnames) || any(mnames == "")) - stop("bst_evaluation must have non-empty names") - - mnames <<- gsub('-', '_', names(env$bst_evaluation), fixed = TRUE) - if (!is.null(env$bst_evaluation_err)) - mnames <<- c(paste0(mnames, '_mean'), paste0(mnames, '_std')) - } - - finalizer <- function(env) { - env$evaluation_log <- as.data.table(t(simplify2array(env$evaluation_log))) - setnames(env$evaluation_log, c('iter', mnames)) - - if (!is.null(env$bst_evaluation_err)) { - # rearrange col order from _mean,_mean,...,_std,_std,... - # to be _mean,_std,_mean,_std,... - len <- length(mnames) - means <- mnames[seq_len(len / 2)] - stds <- mnames[(len / 2 + 1):len] - cnames <- numeric(len) - cnames[c(TRUE, FALSE)] <- means - cnames[c(FALSE, TRUE)] <- stds - env$evaluation_log <- env$evaluation_log[, c('iter', cnames), with = FALSE] - } - } - - callback <- function(env = parent.frame(), finalize = FALSE) { - if (is.null(mnames)) - init(env) - - if (finalize) - return(finalizer(env)) - - ev <- env$bst_evaluation - if (!is.null(env$bst_evaluation_err)) - ev <- c(ev, env$bst_evaluation_err) - env$evaluation_log <- c(env$evaluation_log, - list(c(iter = env$iteration, ev))) - } - attr(callback, 'call') <- match.call() - attr(callback, 'name') <- 'cb.evaluation.log' - callback -} - -#' Callback closure for resetting the booster's parameters at each iteration. +#' This environment is only used by the functions supplied to the callback, and will +#' not be kept after the model fitting function terminates (see parameter `f_after_training`). #' -#' @param new_params a list where each element corresponds to a parameter that needs to be reset. -#' Each element's value must be either a vector of values of length \code{nrounds} -#' to be set at each iteration, -#' or a function of two parameters \code{learning_rates(iteration, nrounds)} -#' which returns a new parameter value by using the current iteration number -#' and the total number of boosting rounds. +#' \item model The booster object when using \link{xgb.train}, or the folds when using +#' \link{xgb.cv}. #' -#' @details -#' This is a "pre-iteration" callback function used to reset booster's parameters -#' at the beginning of each iteration. -#' -#' Note that when training is resumed from some previous model, and a function is used to -#' reset a parameter value, the \code{nrounds} argument in this function would be the -#' the number of boosting rounds in the current training. -#' -#' Callback function expects the following values to be set in its calling frame: -#' \code{bst} or \code{bst_folds}, -#' \code{iteration}, -#' \code{begin_iteration}, -#' \code{end_iteration}. -#' -#' @seealso -#' \code{\link{callbacks}} -#' -#' @export -cb.reset.parameters <- function(new_params) { - - if (typeof(new_params) != "list") - stop("'new_params' must be a list") - pnames <- gsub(".", "_", names(new_params), fixed = TRUE) - nrounds <- NULL - - # run some checks in the beginning - init <- function(env) { - nrounds <<- env$end_iteration - env$begin_iteration + 1 - - if (is.null(env$bst) && is.null(env$bst_folds)) - stop("Parent frame has neither 'bst' nor 'bst_folds'") - - # Some parameters are not allowed to be changed, - # since changing them would simply wreck some chaos - not_allowed <- pnames %in% - c('num_class', 'num_output_group', 'size_leaf_vector', 'updater_seq') - if (any(not_allowed)) - stop('Parameters ', paste(pnames[not_allowed]), " cannot be changed during boosting.") - - for (n in pnames) { - p <- new_params[[n]] - if (is.function(p)) { - if (length(formals(p)) != 2) - stop("Parameter '", n, "' is a function but not of two arguments") - } else if (is.numeric(p) || is.character(p)) { - if (length(p) != nrounds) - stop("Length of '", n, "' has to be equal to 'nrounds'") - } else { - stop("Parameter '", n, "' is not a function or a vector") - } - } - } - - callback <- function(env = parent.frame()) { - if (is.null(nrounds)) - init(env) - - i <- env$iteration - pars <- lapply(new_params, function(p) { - if (is.function(p)) - return(p(i, nrounds)) - p[i] - }) - - if (!is.null(env$bst)) { - xgb.parameters(env$bst) <- pars - } else { - for (fd in env$bst_folds) - xgb.parameters(fd$bst) <- pars - } - } - attr(callback, 'is_pre_iteration') <- TRUE - attr(callback, 'call') <- match.call() - attr(callback, 'name') <- 'cb.reset.parameters' - callback -} - - -#' Callback closure to activate the early stopping. -#' -#' @param stopping_rounds The number of rounds with no improvement in -#' the evaluation metric in order to stop the training. -#' @param maximize whether to maximize the evaluation metric -#' @param metric_name the name of an evaluation column to use as a criteria for early -#' stopping. If not set, the last column would be used. -#' Let's say the test data in \code{watchlist} was labelled as \code{dtest}, -#' and one wants to use the AUC in test data for early stopping regardless of where -#' it is in the \code{watchlist}, then one of the following would need to be set: -#' \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}. -#' All dash '-' characters in metric names are considered equivalent to '_'. -#' @param verbose whether to print the early stopping information. -#' -#' @details -#' This callback function determines the condition for early stopping -#' by setting the \code{stop_condition = TRUE} flag in its calling frame. -#' -#' The following additional fields are assigned to the model's R object: -#' \itemize{ -#' \item \code{best_score} the evaluation score at the best iteration -#' \item \code{best_iteration} at which boosting iteration the best score has occurred (1-based index) -#' } -#' The Same values are also stored as xgb-attributes: -#' \itemize{ -#' \item \code{best_iteration} is stored as a 0-based iteration index (for interoperability of binary models) -#' \item \code{best_msg} message string is also stored. +#' For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{ +#' \item `dtrain`: The training data for the fold (as an `xgb.DMatrix` object). +#' \item `bst`: Rhe `xgb.Booster` object for the fold. +#' \item `watchlist`: A list with two DMatrices, with names `train` and `test` +#' (`test` is the held-out data for the fold). +#' \item `index`: The indices of the hold-out data for that fold (base-1 indexing), +#' from which the `test` entry in the watchlist was obtained. #' } #' -#' At least one data element is required in the evaluation watchlist for early stopping to work. +#' This object should \bold{not} be in-place modified in ways that conflict with the +#' training (e.g. resetting the parameters for a training update in a way that resets +#' the number of rounds to zero in order to overwrite rounds). #' -#' Callback function expects the following values to be set in its calling frame: -#' \code{stop_condition}, -#' \code{bst_evaluation}, -#' \code{rank}, -#' \code{bst} (or \code{bst_folds} and \code{basket}), -#' \code{iteration}, -#' \code{begin_iteration}, -#' \code{end_iteration}, +#' Note that any R attributes that are assigned to the booster during the callback functions, +#' will not be kept thereafter as the booster object variable is not re-assigned during +#' training. It is however possible to set C-level attributes of the booster through +#' \link{xgb.attr} or \link{xgb.attributes}, which should remain available for the rest +#' of the iterations and after the training is done. #' -#' @seealso -#' \code{\link{callbacks}}, -#' \code{\link{xgb.attr}} +#' For keeping variables across iterations, it's recommended to use `env` instead. +#' \item data The data to which the model is being fit, as an `xgb.DMatrix` object. #' -#' @export -cb.early.stop <- function(stopping_rounds, maximize = FALSE, - metric_name = NULL, verbose = TRUE) { - # state variables - best_iteration <- -1 - best_score <- Inf - best_msg <- NULL - metric_idx <- 1 - - init <- function(env) { - if (length(env$bst_evaluation) == 0) - stop("For early stopping, watchlist must have at least one element") - - eval_names <- gsub('-', '_', names(env$bst_evaluation), fixed = TRUE) - if (!is.null(metric_name)) { - metric_idx <<- which(gsub('-', '_', metric_name, fixed = TRUE) == eval_names) - if (length(metric_idx) == 0) - stop("'metric_name' for early stopping is not one of the following:\n", - paste(eval_names, collapse = ' '), '\n') - } - if (is.null(metric_name) && - length(env$bst_evaluation) > 1) { - metric_idx <<- length(eval_names) - if (verbose) - cat('Multiple eval metrics are present. Will use ', - eval_names[metric_idx], ' for early stopping.\n', sep = '') - } - - metric_name <<- eval_names[metric_idx] - - # maximize is usually NULL when not set in xgb.train and built-in metrics - if (is.null(maximize)) - maximize <<- grepl('(_auc|_map|_ndcg|_pre)', metric_name) - - if (verbose && NVL(env$rank, 0) == 0) - cat("Will train until ", metric_name, " hasn't improved in ", - stopping_rounds, " rounds.\n\n", sep = '') - - best_iteration <<- 1 - if (maximize) best_score <<- -Inf - - env$stop_condition <- FALSE - - if (!is.null(env$bst)) { - if (!inherits(env$bst, 'xgb.Booster')) - stop("'bst' in the parent frame must be an 'xgb.Booster'") - if (!is.null(best_score <- xgb.attr(env$bst, 'best_score'))) { - best_score <<- as.numeric(best_score) - best_iteration <<- as.numeric(xgb.attr(env$bst, 'best_iteration')) + 1 - best_msg <<- as.numeric(xgb.attr(env$bst, 'best_msg')) - } else { - xgb.attributes(env$bst) <- list(best_iteration = best_iteration - 1, - best_score = best_score) - } - } else if (is.null(env$bst_folds) || is.null(env$basket)) { - stop("Parent frame has neither 'bst' nor ('bst_folds' and 'basket')") - } - } - - finalizer <- function(env) { - if (!is.null(env$bst)) { - attr_best_score <- as.numeric(xgb.attr(env$bst, 'best_score')) - if (best_score != attr_best_score) { - # If the difference is too big, throw an error - if (abs(best_score - attr_best_score) >= 1e-14) { - stop("Inconsistent 'best_score' values between the closure state: ", best_score, - " and the xgb.attr: ", attr_best_score) - } - # If the difference is due to floating-point truncation, update best_score - best_score <- attr_best_score - } - xgb.attr(env$bst, "best_iteration") <- best_iteration - 1 - xgb.attr(env$bst, "best_score") <- best_score - } else { - env$basket$best_iteration <- best_iteration - } - } - - callback <- function(env = parent.frame(), finalize = FALSE) { - if (best_iteration < 0) - init(env) - - if (finalize) - return(finalizer(env)) - - i <- env$iteration - score <- env$bst_evaluation[metric_idx] - - if ((maximize && score > best_score) || - (!maximize && score < best_score)) { - - best_msg <<- .format_eval_string( - i, env$bst_evaluation, env$bst_evaluation_err - ) - best_score <<- score - best_iteration <<- i - # save the property to attributes, so they will occur in checkpoint - if (!is.null(env$bst)) { - xgb.attributes(env$bst) <- list( - best_iteration = best_iteration - 1, # convert to 0-based index - best_score = best_score, - best_msg = best_msg - ) - } - } else if (i - best_iteration >= stopping_rounds) { - env$stop_condition <- TRUE - env$end_iteration <- i - if (verbose && NVL(env$rank, 0) == 0) - cat("Stopping. Best iteration:\n", best_msg, "\n\n", sep = '') - } - } - attr(callback, 'call') <- match.call() - attr(callback, 'name') <- 'cb.early.stop' - callback -} - - -#' Callback closure for saving a model file. +#' Note that, for \link{xgb.cv}, this will be the full data, while data for the specific +#' folds can be found in the `model` object. #' -#' @param save_period save the model to disk after every -#' \code{save_period} iterations; 0 means save the model at the end. -#' @param save_name the name or path for the saved model file. +#' \item watchlist The evaluation watchlist, as passed under argument `watchlist` to +#' \link{xgb.train}. #' -#' Note that the format of the model being saved is determined by the file -#' extension specified here (see \link{xgb.save} for details about how it works). +#' For \link{xgb.cv}, this will always be `NULL`. #' -#' It can contain a \code{\link[base]{sprintf}} formatting specifier -#' to include the integer iteration number in the file name. -#' E.g., with \code{save_name} = 'xgboost_%04d.ubj', -#' the file saved at iteration 50 would be named "xgboost_0050.ubj". -#' @seealso \link{xgb.save} -#' @details -#' This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end. +#' \item begin_iteration Index of the first boosting iteration that will be executed +#' (base-1 indexing). #' -#' Callback function expects the following values to be set in its calling frame: -#' \code{bst}, -#' \code{iteration}, -#' \code{begin_iteration}, -#' \code{end_iteration}. +#' This will typically be '1', but when using training continuation, depending on the +#' parameters for updates, boosting rounds will be continued from where the previous +#' model ended, in which case this will be larger than 1. #' -#' @seealso -#' \code{\link{callbacks}} +#' \item end_iteration Index of the last boostign iteration that will be executed +#' (base-1 indexing, inclusive of this end). #' -#' @export -cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") { - - if (save_period < 0) - stop("'save_period' cannot be negative") - - callback <- function(env = parent.frame()) { - if (is.null(env$bst)) - stop("'save_model' callback requires the 'bst' booster object in its calling frame") - - if ((save_period > 0 && (env$iteration - env$begin_iteration) %% save_period == 0) || - (save_period == 0 && env$iteration == env$end_iteration)) { - # Note: this throws a warning if the name doesn't have anything to format through 'sprintf' - suppressWarnings({ - save_name <- sprintf(save_name, env$iteration) - }) - xgb.save(env$bst, save_name) - } - } - attr(callback, 'call') <- match.call() - attr(callback, 'name') <- 'cb.save.model' - callback -} - - -#' Callback closure for returning cross-validation based predictions. +#' It should match with argument `nrounds` passed to \link{xgb.train} or \link{xgb.cv}. #' -#' @param save_models a flag for whether to save the folds' models. +#' Note that boosting might be interrupted before reaching this last iteration, for +#' example by using the early stopping callback \link{xgb.cb.early.stop}. #' -#' @details -#' This callback function saves predictions for all of the test folds, -#' and also allows to save the folds' models. +#' \item iteration Index of the iteration number that is being executed (first iteration +#' will be the same as parameter `begin_iteration`, then next one will add +1, and so on). #' -#' It is a "finalizer" callback and it uses early stopping information whenever it is available, -#' thus it must be run after the early stopping callback if the early stopping is used. +#' \item iter_feval Evaluation metrics for the `watchlist` that was supplied, either +#' determined by the objective, or by parameter `feval`. #' -#' Callback function expects the following values to be set in its calling frame: -#' \code{bst_folds}, -#' \code{basket}, -#' \code{data}, -#' \code{end_iteration}, -#' \code{params}, +#' For \link{xgb.train}, this will be a named vector with one entry per element in +#' `watchlist`, where the names are determined as 'watchlist name' + '-' + 'metric name' - for +#' example, if `watchlist` contains an entry named "tr" and the metric is "rmse", +#' this will be a one-element vector with name "tr-rmse". #' -#' @return -#' Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix, -#' depending on the number of prediction outputs per data row. The order of predictions corresponds -#' to the order of rows in the original dataset. Note that when a custom \code{folds} list is -#' provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a -#' non-overlapping list of k sets of indices, as in a standard k-fold CV. The predictions would not be -#' meaningful when user-provided folds have overlapping indices as in, e.g., random sampling splits. -#' When some of the indices in the training dataset are not included into user-provided \code{folds}, -#' their prediction value would be \code{NA}. +#' For \link{xgb.cv}, this will be a 2d matrix with dimensions `[length(watchlist), nfolds]`, +#' where the row names will follow the same naming logic as the one-dimensional vector +#' that is passed in \link{xgb.train}. #' -#' @seealso -#' \code{\link{callbacks}} +#' Note that, internally, the built-in callbacks such as \link{xgb.cb.print.evaluation} summarize +#' this table by calculating the row-wise means and standard deviations. #' -#' @export -cb.cv.predict <- function(save_models = FALSE) { - - finalizer <- function(env) { - if (is.null(env$basket) || is.null(env$bst_folds)) - stop("'cb.cv.predict' callback requires 'basket' and 'bst_folds' lists in its calling frame") - - N <- nrow(env$data) - pred <- NULL - - iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration)) - if (NVL(env$params[['booster']], '') == 'gblinear') { - iterationrange <- "all" - } - for (fd in env$bst_folds) { - pr <- predict(fd$bst, fd$watchlist[[2]], iterationrange = iterationrange, reshape = TRUE) - if (is.null(pred)) { - if (NCOL(pr) > 1L) { - pred <- matrix(NA_real_, N, ncol(pr)) - } else { - pred <- matrix(NA_real_, N) - } - } - if (is.matrix(pred)) { - pred[fd$index, ] <- pr - } else { - pred[fd$index] <- pr - } - } - env$basket$pred <- pred - if (save_models) { - env$basket$models <- lapply(env$bst_folds, function(fd) { - return(fd$bst) - }) - } - } - - callback <- function(env = parent.frame(), finalize = FALSE) { - if (finalize) - return(finalizer(env)) - } - attr(callback, 'call') <- match.call() - attr(callback, 'name') <- 'cb.cv.predict' - callback -} - - -#' Callback closure for collecting the model coefficients history of a gblinear booster -#' during its training. +#' \item final_feval The evaluation results after the last boosting round is executed +#' (same format as `iter_feval`, and will be the exact same input as passed under +#' `iter_feval` to the last round that is executed during model fitting). #' -#' @param sparse when set to FALSE/TRUE, a dense/sparse matrix is used to store the result. -#' Sparse format is useful when one expects only a subset of coefficients to be non-zero, -#' when using the "thrifty" feature selector with fairly small number of top features -#' selected per iteration. +#' \item prev_cb_res Result from a previous run of a callback sharing the same name +#' (as given by parameter `cb_name`) when conducting training continuation, if there +#' was any in the booster R attributes. #' -#' @details -#' To keep things fast and simple, gblinear booster does not internally store the history of linear -#' model coefficients at each boosting iteration. This callback provides a workaround for storing -#' the coefficients' path, by extracting them after each training iteration. +#' Some times, one might want to append the new results to the previous one, and this will +#' be done automatically by the built-in callbacks such as \link{xgb.cb.evaluation.log}, +#' which will append the new rows to the previous table. #' -#' Callback function expects the following values to be set in its calling frame: -#' \code{bst} (or \code{bst_folds}). +#' If no such previous callback result is available (which it never will when fitting +#' a model from start instead of updating an existing model), this will be `NULL`. #' -#' @return -#' Results are stored in the \code{coefs} element of the closure. -#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy -#' way to access it. -#' With \code{xgb.train}, it is either a dense of a sparse matrix. -#' While with \code{xgb.cv}, it is a list (an element per each fold) of such -#' matrices. +#' For \link{xgb.cv}, which doesn't support training continuation, this will always be `NULL`. +#' } #' -#' @seealso -#' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}. +#' The following names (`cb_name` values) are reserved for internal callbacks:\itemize{ +#' \item print_evaluation +#' \item evaluation_log +#' \item reset_parameters +#' \item early_stop +#' \item save_model +#' \item cv_predict +#' \item gblinear_history +#' } #' +#' The following names are reserved for other non-callback attributes:\itemize{ +#' \item names +#' \item class +#' \item call +#' \item params +#' \item niter +#' \item nfeatures +#' \item folds +#' } +#' +#' When using the built-in early stopping callback (\link{xgb.cb.early.stop}), said callback +#' will always be executed before the others, as it sets some booster C-level attributes +#' that other callbacks might also use. Otherwise, the order of execution will match with +#' the order in which the callbacks are passed to the model fitting function. +#' @seealso Built-in callbacks:\itemize{ +#' \item \link{xgb.cb.print.evaluation} +#' \item \link{xgb.cb.evaluation.log} +#' \item \link{xgb.cb.reset.parameters} +#' \item \link{xgb.cb.early.stop} +#' \item \link{xgb.cb.save.model} +#' \item \link{xgb.cb.cv.predict} +#' \item \link{xgb.cb.gblinear.history} +#' } #' @examples -#' #### Binary classification: +#' # Example constructing a custom callback that calculates +#' # squared error on the training data, without a watchlist, +#' # and outputs the per-iteration results. +#' ssq_callback <- xgb.Callback( +#' cb_name = "ssq", +#' f_before_training = function(env, model, data, watchlist, +#' begin_iteration, end_iteration) { +#' # A vector to keep track of a number at each iteration +#' env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1) +#' }, +#' f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) { +#' # This calculates the sum of squared errors on the training data. +#' # Note that this can be better done by passing a 'watchlist' entry, +#' # but this demonstrates a way in which callbacks can be structured. +#' pred <- predict(model, data) +#' err <- pred - getinfo(data, "label") +#' sq_err <- sum(err^2) +#' env$logs[iteration] <- sq_err +#' cat( +#' sprintf( +#' "Squared error at iteration %d: %.2f\n", +#' iteration, sq_err +#' ) +#' ) #' -#' ## Keep the number of threads to 1 for examples -#' nthread <- 1 -#' data.table::setDTthreads(nthread) +#' # A return value of 'TRUE' here would signal to finalize the training +#' return(FALSE) +#' }, +#' f_after_training = function(env, model, data, watchlist, iteration, +#' final_feval, prev_cb_res) { +#' return(env$logs) +#' } +#' ) #' -#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest -#' # without considering the 2nd order interactions: -#' x <- model.matrix(Species ~ .^2, iris)[,-1] -#' colnames(x) -#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread) -#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc", -#' lambda = 0.0003, alpha = 0.0003, nthread = nthread) -#' # For 'shotgun', which is a default linear updater, using high eta values may result in -#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning -#' # rate does not break the convergence, but allows us to illustrate the typical pattern of -#' # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations. -#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1., -#' callbacks = list(cb.gblinear.history())) -#' # Extract the coefficients' path and plot them vs boosting iteration number: -#' coef_path <- xgb.gblinear.history(bst) -#' matplot(coef_path, type = 'l') -#' -#' # With the deterministic coordinate descent updater, it is safer to use higher learning rates. -#' # Will try the classical componentwise boosting which selects a single best feature per round: -#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8, -#' updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1, -#' callbacks = list(cb.gblinear.history())) -#' matplot(xgb.gblinear.history(bst), type = 'l') -#' # Componentwise boosting is known to have similar effect to Lasso regularization. -#' # Try experimenting with various values of top_k, eta, nrounds, -#' # as well as different feature_selectors. -#' -#' # For xgb.cv: -#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8, -#' callbacks = list(cb.gblinear.history())) -#' # coefficients in the CV fold #3 -#' matplot(xgb.gblinear.history(bst)[[3]], type = 'l') -#' -#' -#' #### Multiclass classification: -#' # -#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread) -#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3, -#' lambda = 0.0003, alpha = 0.0003, nthread = nthread) -#' # For the default linear updater 'shotgun' it sometimes is helpful -#' # to use smaller eta to reduce instability -#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5, -#' callbacks = list(cb.gblinear.history())) -#' # Will plot the coefficient paths separately for each class: -#' matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l') -#' matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l') -#' matplot(xgb.gblinear.history(bst, class_index = 2), type = 'l') -#' -#' # CV: -#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5, -#' callbacks = list(cb.gblinear.history(FALSE))) -#' # 1st fold of 1st class -#' matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l') +#' data(mtcars) +#' y <- mtcars$mpg +#' x <- as.matrix(mtcars[, -1]) +#' dm <- xgb.DMatrix(x, label = y, nthread = 1) +#' model <- xgb.train( +#' data = dm, +#' params = list(objective = "reg:squarederror", nthread = 1), +#' nrounds = 5, +#' callbacks = list(ssq_callback), +#' keep_extra_attributes = TRUE +#' ) #' +#' # Result from 'f_after_iter' will be available as an attribute +#' attributes(model)$ssq #' @export -cb.gblinear.history <- function(sparse = FALSE) { - coefs <- NULL +xgb.Callback <- function( + cb_name = "custom_callback", + env = new.env(), + f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) NULL, + f_before_iter = function(env, model, data, watchlist, iteration) NULL, + f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL, + f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) NULL +) { + stopifnot(is.null(f_before_training) || is.function(f_before_training)) + stopifnot(is.null(f_before_iter) || is.function(f_before_iter)) + stopifnot(is.null(f_after_iter) || is.function(f_after_iter)) + stopifnot(is.null(f_after_training) || is.function(f_after_training)) + stopifnot(is.character(cb_name) && length(cb_name) == 1) - init <- function(env) { - # xgb.train(): bst will be present - # xgb.cv(): bst_folds will be present - if (is.null(env$bst) && is.null(env$bst_folds)) { - stop("Parent frame has neither 'bst' nor 'bst_folds'") - } + if (cb_name %in% .reserved_cb_names) { + stop("Cannot use reserved callback name '", cb_name, "'.") } - # convert from list to (sparse) matrix - list2mat <- function(coef_list) { - if (sparse) { - coef_mat <- sparseMatrix(x = unlist(lapply(coef_list, slot, "x")), - i = unlist(lapply(coef_list, slot, "i")), - p = c(0, cumsum(sapply(coef_list, function(x) length(x@x)))), - dims = c(length(coef_list[[1]]), length(coef_list))) - return(t(coef_mat)) - } else { - return(do.call(rbind, coef_list)) - } - } + out <- list( + cb_name = cb_name, + env = env, + f_before_training = f_before_training, + f_before_iter = f_before_iter, + f_after_iter = f_after_iter, + f_after_training = f_after_training + ) + class(out) <- "xgb.Callback" + return(out) +} - finalizer <- function(env) { - if (length(coefs) == 0) - return() - if (!is.null(env$bst)) { # # xgb.train: - coefs <<- list2mat(coefs) - } else { # xgb.cv: - # second lapply transposes the list - coefs <<- lapply( - X = lapply( - X = seq_along(coefs[[1]]), - FUN = function(i) lapply(coefs, "[[", i) - ), - FUN = list2mat +.execute.cb.before.training <- function( + callbacks, + model, + data, + watchlist, + begin_iteration, + end_iteration +) { + for (callback in callbacks) { + if (!is.null(callback$f_before_training)) { + callback$f_before_training( + callback$env, + model, + data, + watchlist, + begin_iteration, + end_iteration ) } } - - extract.coef <- function(env) { - if (!is.null(env$bst)) { # # xgb.train: - cf <- as.numeric(grep('(booster|bias|weigh)', xgb.dump(env$bst), invert = TRUE, value = TRUE)) - if (sparse) cf <- as(cf, "sparseVector") - } else { # xgb.cv: - cf <- vector("list", length(env$bst_folds)) - for (i in seq_along(env$bst_folds)) { - dmp <- xgb.dump(env$bst_folds[[i]]$bst) - cf[[i]] <- as.numeric(grep('(booster|bias|weigh)', dmp, invert = TRUE, value = TRUE)) - if (sparse) cf[[i]] <- as(cf[[i]], "sparseVector") - } - } - cf - } - - callback <- function(env = parent.frame(), finalize = FALSE) { - if (is.null(coefs)) init(env) - if (finalize) return(finalizer(env)) - cf <- extract.coef(env) - coefs <<- c(coefs, list(cf)) - } - - attr(callback, 'call') <- match.call() - attr(callback, 'name') <- 'cb.gblinear.history' - callback } -#' @title Extract gblinear coefficients history. -#' @description A helper function to extract the matrix of linear coefficients' history -#' from a gblinear model created while using the \code{cb.gblinear.history()} -#' callback. -#' @details Note that this is an R-specific function that relies on R attributes that -#' are not saved when using xgboost's own serialization functions like \link{xgb.load} -#' or \link{xgb.load.raw}. -#' -#' In order for a serialized model to be accepted by tgis function, one must use R -#' serializers such as \link{saveRDS}. -#' @param model either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained -#' using the \code{cb.gblinear.history()} callback, but \bold{not} a booster -#' loaded from \link{xgb.load} or \link{xgb.load.raw}. -#' @param class_index zero-based class index to extract the coefficients for only that -#' specific class in a multinomial multiclass model. When it is NULL, all the -#' coefficients are returned. Has no effect in non-multiclass models. -#' -#' @return -#' For an \code{xgb.train} result, a matrix (either dense or sparse) with the columns -#' corresponding to iteration's coefficients (in the order as \code{xgb.dump()} would -#' return) and the rows corresponding to boosting iterations. -#' -#' For an \code{xgb.cv} result, a list of such matrices is returned with the elements -#' corresponding to CV folds. -#' -#' @export -xgb.gblinear.history <- function(model, class_index = NULL) { - - if (!(inherits(model, "xgb.Booster") || - inherits(model, "xgb.cv.synchronous"))) - stop("model must be an object of either xgb.Booster or xgb.cv.synchronous class") - is_cv <- inherits(model, "xgb.cv.synchronous") - - if (is_cv) { - callbacks <- model$callbacks - } else { - callbacks <- attributes(model)$callbacks +.execute.cb.before.iter <- function( + callbacks, + model, + data, + watchlist, + iteration +) { + if (!length(callbacks)) { + return(FALSE) } + out <- sapply(callbacks, function(cb) { + if (is.null(cb$f_before_iter)) { + return(FALSE) + } + should_stop <- cb$f_before_iter( + cb$env, + model, + data, + watchlist, + iteration + ) + if (!NROW(should_stop)) { + should_stop <- FALSE + } else if (NROW(should_stop) > 1) { + should_stop <- head(as.logical(should_stop), 1) + } + return(should_stop) + }) + return(any(out)) +} - if (is.null(callbacks) || is.null(callbacks$cb.gblinear.history)) - stop("model must be trained while using the cb.gblinear.history() callback") - - if (!is_cv) { - num_class <- xgb.num_class(model) - num_feat <- xgb.num_feature(model) - } else { - # in case of CV, the object is expected to have this info - if (model$params$booster != "gblinear") - stop("It does not appear to be a gblinear model") - num_class <- NVL(model$params$num_class, 1) - num_feat <- model$nfeatures - if (is.null(num_feat)) - stop("This xgb.cv result does not have nfeatures info") +.execute.cb.after.iter <- function( + callbacks, + model, + data, + watchlist, + iteration, + iter_feval +) { + if (!length(callbacks)) { + return(FALSE) } + out <- sapply(callbacks, function(cb) { + if (is.null(cb$f_after_iter)) { + return(FALSE) + } + should_stop <- cb$f_after_iter( + cb$env, + model, + data, + watchlist, + iteration, + iter_feval + ) + if (!NROW(should_stop)) { + should_stop <- FALSE + } else if (NROW(should_stop) > 1) { + should_stop <- head(as.logical(should_stop), 1) + } + return(should_stop) + }) + return(any(out)) +} - if (!is.null(class_index) && - num_class > 1 && - (class_index[1] < 0 || class_index[1] >= num_class)) - stop("class_index has to be within [0,", num_class - 1, "]") - - coef_path <- environment(callbacks$cb.gblinear.history)[["coefs"]] - if (!is.null(class_index) && num_class > 1) { - coef_path <- if (is.list(coef_path)) { - lapply(coef_path, - function(x) x[, seq(1 + class_index, by = num_class, length.out = num_feat)]) +.execute.cb.after.training <- function( + callbacks, + model, + data, + watchlist, + iteration, + final_feval, + prev_cb_res +) { + if (!length(callbacks)) { + return(NULL) + } + old_cb_res <- attributes(model) + out <- lapply(callbacks, function(cb) { + if (is.null(cb$f_after_training)) { + return(NULL) } else { - coef_path <- coef_path[, seq(1 + class_index, by = num_class, length.out = num_feat)] + return( + cb$f_after_training( + cb$env, + model, + data, + watchlist, + iteration, + final_feval, + getElement(old_cb_res, cb$cb_name) + ) + ) } + }) + names(out) <- sapply(callbacks, function(cb) cb$cb_name) + if (NROW(out)) { + out <- out[!sapply(out, is.null)] } - coef_path + return(out) } +.summarize.feval <- function(iter_feval, showsd) { + if (NCOL(iter_feval) > 1L && showsd) { + stdev <- apply(iter_feval, 1, sd) + } else { + stdev <- NULL + } + if (NCOL(iter_feval) > 1L) { + iter_feval <- rowMeans(iter_feval) + } + return(list(feval = iter_feval, stdev = stdev)) +} -# -# Internal utility functions for callbacks ------------------------------------ -# +.print.evaluation <- function(iter_feval, showsd, iteration) { + tmp <- .summarize.feval(iter_feval, showsd) + msg <- .format_eval_string(iteration, tmp$feval, tmp$stdev) + cat(msg, '\n') +} # Format the evaluation metric string .format_eval_string <- function(iter, eval_res, eval_err = NULL) { @@ -784,69 +407,838 @@ xgb.gblinear.history <- function(model, class_index = NULL) { return(paste0(iter, res)) } -# Extract callback names from the list of callbacks -callback.names <- function(cb_list) { - unlist(lapply(cb_list, function(x) attr(x, 'name'))) -} - -# Extract callback calls from the list of callbacks -callback.calls <- function(cb_list) { - unlist(lapply(cb_list, function(x) attr(x, 'call'))) -} - -# Add a callback cb to the list and make sure that -# cb.early.stop and cb.cv.predict are at the end of the list -# with cb.cv.predict being the last (when present) -add.cb <- function(cb_list, cb) { - cb_list <- c(cb_list, cb) - names(cb_list) <- callback.names(cb_list) - if ('cb.early.stop' %in% names(cb_list)) { - cb_list <- c(cb_list, cb_list['cb.early.stop']) - # this removes only the first one - cb_list['cb.early.stop'] <- NULL +#' @title Callback for printing the result of evaluation +#' @param period results would be printed every number of periods +#' @param showsd whether standard deviations should be printed (when available) +#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +#' @description +#' The callback function prints the result of evaluation at every \code{period} iterations. +#' The initial and the last iteration's evaluations are always printed. +#' +#' Does not leave any attribute in the booster (see \link{xgb.cb.evaluation.log} for that). +#' @seealso \link{xgb.Callback} +#' @export +xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) { + if (length(period) != 1 || period != floor(period) || period < 1) { + stop("'period' must be a positive integer.") } - if ('cb.cv.predict' %in% names(cb_list)) { - cb_list <- c(cb_list, cb_list['cb.cv.predict']) - cb_list['cb.cv.predict'] <- NULL - } - cb_list -} -# Sort callbacks list into categories -categorize.callbacks <- function(cb_list) { - list( - pre_iter = Filter(function(x) { - pre <- attr(x, 'is_pre_iteration') - !is.null(pre) && pre - }, cb_list), - post_iter = Filter(function(x) { - pre <- attr(x, 'is_pre_iteration') - is.null(pre) || !pre - }, cb_list), - finalize = Filter(function(x) { - 'finalize' %in% names(formals(x)) - }, cb_list) + xgb.Callback( + cb_name = "print_evaluation", + env = as.environment(list(period = period, showsd = showsd, is_first_call = TRUE)), + f_before_training = NULL, + f_before_iter = NULL, + f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) { + if (is.null(iter_feval)) { + return(FALSE) + } + if (env$is_first_call || (iteration - 1) %% env$period == 0) { + .print.evaluation(iter_feval, env$showsd, iteration) + env$last_printed_iter <- iteration + } + env$is_first_call <- FALSE + return(FALSE) + }, + f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) { + if (is.null(final_feval)) { + return(NULL) + } + if (is.null(env$last_printed_iter) || iteration > env$last_printed_iter) { + .print.evaluation(final_feval, env$showsd, iteration) + } + } ) } -# Check whether all callback functions with names given by 'query_names' are present in the 'cb_list'. -has.callbacks <- function(cb_list, query_names) { - if (length(cb_list) < length(query_names)) - return(FALSE) - if (!is.list(cb_list) || - any(sapply(cb_list, class) != 'function')) { - stop('`cb_list` must be a list of callback functions') - } - cb_names <- callback.names(cb_list) - if (!is.character(cb_names) || - length(cb_names) != length(cb_list) || - any(cb_names == "")) { - stop('All callbacks in the `cb_list` must have a non-empty `name` attribute') - } - if (!is.character(query_names) || - length(query_names) == 0 || - any(query_names == "")) { - stop('query_names must be a non-empty vector of non-empty character names') - } - return(all(query_names %in% cb_names)) +#' @title Callback for logging the evaluation history +#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +#' @details This callback creates a table with per-iteration evaluation metrics (see parameters +#' `watchlist` and `feval` in \link{xgb.train}). +#' @details +#' Note: in the column names of the final data.table, the dash '-' character is replaced with +#' the underscore '_' in order to make the column names more like regular R identifiers. +#' @seealso \link{xgb.cb.print.evaluation} +#' @export +xgb.cb.evaluation.log <- function() { + xgb.Callback( + cb_name = "evaluation_log", + f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) { + env$evaluation_log <- vector("list", end_iteration - begin_iteration + 1) + env$next_log <- 1 + }, + f_before_iter = NULL, + f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) { + tmp <- .summarize.feval(iter_feval, TRUE) + env$evaluation_log[[env$next_log]] <- list(iter = iteration, metrics = tmp$feval, sds = tmp$stdev) + env$next_log <- env$next_log + 1 + return(FALSE) + }, + f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) { + if (!NROW(env$evaluation_log)) { + return(prev_cb_res) + } + # in case of early stopping + if (env$next_log <= length(env$evaluation_log)) { + env$evaluation_log <- head(env$evaluation_log, env$next_log - 1) + } + + iters <- data.frame(iter = sapply(env$evaluation_log, function(x) x$iter)) + metrics <- do.call(rbind, lapply(env$evaluation_log, function(x) x$metrics)) + mnames <- gsub("-", "_", names(env$evaluation_log[[1]]$metrics), fixed = TRUE) + colnames(metrics) <- mnames + has_sds <- !is.null(env$evaluation_log[[1]]$sds) + if (has_sds) { + sds <- do.call(rbind, lapply(env$evaluation_log, function(x) x$sds)) + colnames(sds) <- mnames + metrics <- lapply( + mnames, + function(metric) { + out <- cbind(metrics[, metric], sds[, metric]) + colnames(out) <- paste0(metric, c("_mean", "_std")) + return(out) + } + ) + metrics <- do.call(cbind, metrics) + } + evaluation_log <- cbind(iters, metrics) + + if (!is.null(prev_cb_res)) { + if (!is.data.table(prev_cb_res)) { + prev_cb_res <- data.table::as.data.table(prev_cb_res) + } + prev_take <- prev_cb_res[prev_cb_res$iter < min(evaluation_log$iter)] + if (nrow(prev_take)) { + evaluation_log <- rbind(prev_cb_res, evaluation_log) + } + } + evaluation_log <- data.table::as.data.table(evaluation_log) + return(evaluation_log) + } + ) +} + +#' @title Callback for resetting the booster's parameters at each iteration. +#' @param new_params a list where each element corresponds to a parameter that needs to be reset. +#' Each element's value must be either a vector of values of length \code{nrounds} +#' to be set at each iteration, +#' or a function of two parameters \code{learning_rates(iteration, nrounds)} +#' which returns a new parameter value by using the current iteration number +#' and the total number of boosting rounds. +#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +#' @details +#' Note that when training is resumed from some previous model, and a function is used to +#' reset a parameter value, the \code{nrounds} argument in this function would be the +#' the number of boosting rounds in the current training. +#' +#' Does not leave any attribute in the booster. +#' @export +xgb.cb.reset.parameters <- function(new_params) { + stopifnot(is.list(new_params)) + pnames <- gsub(".", "_", names(new_params), fixed = TRUE) + not_allowed <- pnames %in% + c('num_class', 'num_output_group', 'size_leaf_vector', 'updater_seq') + if (any(not_allowed)) + stop('Parameters ', paste(pnames[not_allowed]), " cannot be changed during boosting.") + + xgb.Callback( + cb_name = "reset_parameters", + env = as.environment(list(new_params = new_params)), + f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) { + env$end_iteration <- end_iteration + + pnames <- gsub(".", "_", names(env$new_params), fixed = TRUE) + for (n in pnames) { + p <- env$new_params[[n]] + if (is.function(p)) { + if (length(formals(p)) != 2) + stop("Parameter '", n, "' is a function but not of two arguments") + } else if (is.numeric(p) || is.character(p)) { + if (length(p) != env$end_iteration) + stop("Length of '", n, "' has to be equal to 'nrounds'") + } else { + stop("Parameter '", n, "' is not a function or a vector") + } + } + }, + f_before_iter = function(env, model, data, watchlist, iteration) { + pars <- lapply(env$new_params, function(p) { + if (is.function(p)) { + return(p(iteration, env$end_iteration)) + } else { + return(p[iteration]) + } + }) + + if (inherits(model, "xgb.Booster")) { + xgb.parameters(model) <- pars + } else { + for (fd in model) { + xgb.parameters(fd$bst) <- pars + } + } + return(FALSE) + }, + f_after_iter = NULL, + f_after_training = NULL + ) +} + +#' @title Callback to activate early stopping +#' @param stopping_rounds The number of rounds with no improvement in +#' the evaluation metric in order to stop the training. +#' @param maximize Whether to maximize the evaluation metric. +#' @param metric_name The name of an evaluation column to use as a criteria for early +#' stopping. If not set, the last column would be used. +#' Let's say the test data in \code{watchlist} was labelled as \code{dtest}, +#' and one wants to use the AUC in test data for early stopping regardless of where +#' it is in the \code{watchlist}, then one of the following would need to be set: +#' \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}. +#' All dash '-' characters in metric names are considered equivalent to '_'. +#' @param verbose Whether to print the early stopping information. +#' @param keep_all_iter Whether to keep all of the boosting rounds that were produced +#' in the resulting object. If passing `FALSE`, will only keep the boosting rounds +#' up to the detected best iteration, discarding the ones that come after. +#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +#' @description +#' This callback function determines the condition for early stopping. +#' +#' The following attributes are assigned to the booster's object: +#' \itemize{ +#' \item \code{best_score} the evaluation score at the best iteration +#' \item \code{best_iteration} at which boosting iteration the best score has occurred +#' (0-based index for interoperability of binary models) +#' } +#' +#' The same values are also stored as R attributes as a result of the callback, plus an additional +#' attribute `stopped_by_max_rounds` which indicates whether an early stopping by the `stopping_rounds` +#' condition occurred. Note that the `best_iteration` that is stored under R attributes will follow +#' base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed +#' through \link{xgb.attr} or \link{xgb.attributes}. +#' +#' At least one data element is required in the evaluation watchlist for early stopping to work. +#' @export +xgb.cb.early.stop <- function( + stopping_rounds, + maximize = FALSE, + metric_name = NULL, + verbose = TRUE, + keep_all_iter = TRUE +) { + if (!is.null(metric_name)) { + stopifnot(is.character(metric_name)) + stopifnot(length(metric_name) == 1L) + } + + xgb.Callback( + cb_name = "early_stop", + env = as.environment( + list( + checked_evnames = FALSE, + stopping_rounds = stopping_rounds, + maximize = maximize, + metric_name = metric_name, + verbose = verbose, + keep_all_iter = keep_all_iter, + stopped_by_max_rounds = FALSE + ) + ), + f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) { + if (inherits(model, "xgb.Booster") && !length(watchlist)) { + stop("For early stopping, watchlist must have at least one element") + } + env$begin_iteration <- begin_iteration + return(NULL) + }, + f_before_iter = function(env, model, data, watchlist, iteration) NULL, + f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) { + sds <- NULL + if (NCOL(iter_feval) > 1) { + tmp <- .summarize.feval(iter_feval, TRUE) + iter_feval <- tmp$feval + sds <- tmp$stdev + } + + if (!env$checked_evnames) { + + eval_names <- gsub('-', '_', names(iter_feval), fixed = TRUE) + if (!is.null(env$metric_name)) { + env$metric_idx <- which(gsub('-', '_', env$metric_name, fixed = TRUE) == eval_names) + if (length(env$metric_idx) == 0) + stop("'metric_name' for early stopping is not one of the following:\n", + paste(eval_names, collapse = ' '), '\n') + } + + if (is.null(env$metric_name)) { + if (NROW(iter_feval) == 1) { + env$metric_idx <- 1L + } else { + env$metric_idx <- length(eval_names) + if (env$verbose) + cat('Multiple eval metrics are present. Will use ', + eval_names[env$metric_idx], ' for early stopping.\n', sep = '') + } + } + + env$metric_name <- eval_names[env$metric_idx] + + # maximize is usually NULL when not set in xgb.train and built-in metrics + if (is.null(env$maximize)) + env$maximize <- grepl('(_auc|_aupr|_map|_ndcg|_pre)', env$metric_name) + + if (env$verbose) + cat("Will train until ", env$metric_name, " hasn't improved in ", + env$stopping_rounds, " rounds.\n\n", sep = '') + + env$best_iteration <- env$begin_iteration + if (env$maximize) { + env$best_score <- -Inf + } else { + env$best_score <- Inf + } + + if (inherits(model, "xgb.Booster")) { + best_score <- xgb.attr(model, 'best_score') + if (NROW(best_score)) env$best_score <- as.numeric(best_score) + best_iteration <- xgb.attr(model, 'best_iteration') + if (NROW(best_iteration)) env$best_iteration <- as.numeric(best_iteration) + 1 + } + + env$checked_evnames <- TRUE + } + + score <- iter_feval[env$metric_idx] + if ((env$maximize && score > env$best_score) || + (!env$maximize && score < env$best_score)) { + + env$best_score <- score + env$best_iteration <- iteration + # save the property to attributes, so they will occur in checkpoint + if (inherits(model, "xgb.Booster")) { + xgb.attributes(model) <- list( + best_iteration = env$best_iteration - 1, # convert to 0-based index + best_score = env$best_score + ) + } + } else if (iteration - env$best_iteration >= env$stopping_rounds) { + if (env$verbose) { + best_msg <- .format_eval_string(iteration, iter_feval, sds) + cat("Stopping. Best iteration:\n", best_msg, "\n\n", sep = '') + } + env$stopped_by_max_rounds <- TRUE + return(TRUE) + } + return(FALSE) + }, + f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) { + if (inherits(model, "xgb.Booster") && !env$keep_all_iter && env$best_iteration < iteration) { + # Note: it loses the attributes after being sliced, + # so they have to be re-assigned afterwards. + prev_attr <- xgb.attributes(model) + if (NROW(prev_attr)) { + suppressWarnings({ + prev_attr <- within(prev_attr, rm("best_score", "best_iteration")) + }) + } + .Call(XGBoosterSliceAndReplace_R, xgb.get.handle(model), 0L, env$best_iteration, 1L) + if (NROW(prev_attr)) { + xgb.attributes(model) <- prev_attr + } + } + attrs_set <- list(best_iteration = env$best_iteration - 1, best_score = env$best_score) + if (inherits(model, "xgb.Booster")) { + xgb.attributes(model) <- attrs_set + } else { + for (fd in model) { + xgb.attributes(fd$bst) <- attrs_set # to use in the cv.predict callback + } + } + return( + list( + best_iteration = env$best_iteration, + best_score = env$best_score, + stopped_by_max_rounds = env$stopped_by_max_rounds + ) + ) + } + ) +} + +.save.model.w.formatted.name <- function(model, save_name, iteration) { + # Note: this throws a warning if the name doesn't have anything to format through 'sprintf' + suppressWarnings({ + save_name <- sprintf(save_name, iteration) + }) + xgb.save(model, save_name) +} + +#' @title Callback for saving a model file. +#' @param save_period Save the model to disk after every +#' \code{save_period} iterations; 0 means save the model at the end. +#' @param save_name The name or path for the saved model file. +#' It can contain a \code{\link[base]{sprintf}} formatting specifier +#' to include the integer iteration number in the file name. +#' E.g., with \code{save_name} = 'xgboost_%04d.model', +#' the file saved at iteration 50 would be named "xgboost_0050.model". +#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train}, +#' but \bold{not} to \link{xgb.cv}. +#' @description +#' This callback function allows to save an xgb-model file, either periodically +#' after each \code{save_period}'s or at the end. +#' +#' Does not leave any attribute in the booster. +#' @export +xgb.cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") { + if (save_period < 0) { + stop("'save_period' cannot be negative") + } + if (!is.character(save_name) || length(save_name) != 1L) { + stop("'save_name' must be a single character refering to file name.") + } + + xgb.Callback( + cb_name = "save_model", + env = as.environment(list(save_period = save_period, save_name = save_name, last_save = 0)), + f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) { + env$begin_iteration <- begin_iteration + }, + f_before_iter = NULL, + f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) { + if (env$save_period > 0 && (iteration - env$begin_iteration) %% env$save_period == 0) { + .save.model.w.formatted.name(model, env$save_name, iteration) + env$last_save <- iteration + } + return(FALSE) + }, + f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) { + if (env$save_period == 0 && iteration > env$last_save) { + .save.model.w.formatted.name(model, env$save_name, iteration) + } + } + ) +} + +#' @title Callback for returning cross-validation based predictions. +#' @param save_models A flag for whether to save the folds' models. +#' @param outputmargin Whether to save margin predictions (same effect as passing this +#' parameter to \link{predict.xgb.Booster}). +#' @return An `xgb.Callback` object, which can be passed to \link{xgb.cv}, +#' but \bold{not} to \link{xgb.train}. +#' @description +#' This callback function saves predictions for all of the test folds, +#' and also allows to save the folds' models. +#' @details +#' Predictions are saved inside of the \code{pred} element, which is either a vector or a matrix, +#' depending on the number of prediction outputs per data row. The order of predictions corresponds +#' to the order of rows in the original dataset. Note that when a custom \code{folds} list is +#' provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a +#' non-overlapping list of k sets of indices, as in a standard k-fold CV. The predictions would not be +#' meaningful when user-provided folds have overlapping indices as in, e.g., random sampling splits. +#' When some of the indices in the training dataset are not included into user-provided \code{folds}, +#' their prediction value would be \code{NA}. +#' @export +xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) { + xgb.Callback( + cb_name = "cv_predict", + env = as.environment(list(save_models = save_models, outputmargin = outputmargin)), + f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) { + if (inherits(model, "xgb.Booster")) { + stop("'cv.predict' callback is only for 'xgb.cv'.") + } + }, + f_before_iter = NULL, + f_after_iter = NULL, + f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) { + pred <- NULL + for (fd in model) { + pr <- predict( + fd$bst, + fd$watchlist[[2L]], + outputmargin = env$outputmargin, + reshape = TRUE + ) + if (is.null(pred)) { + if (NCOL(pr) > 1L) { + pred <- matrix(NA_real_, nrow(data), ncol(pr)) + } else { + pred <- matrix(NA_real_, nrow(data)) + } + } + if (is.matrix(pred)) { + pred[fd$index, ] <- pr + } else { + pred[fd$index] <- pr + } + } + out <- list(pred = pred) + if (env$save_models) { + out$models <- lapply(model, function(fd) fd$bst) + } + return(out) + } + ) +} + +.list2mat <- function(coef_list, sparse) { + if (sparse) { + coef_mat <- methods::new("dgRMatrix") + coef_mat@p <- as.integer(c(0, cumsum(sapply(coef_list, function(x) length(x@x))))) + coef_mat@j <- as.integer(unlist(lapply(coef_list, slot, "i")) - 1L) + coef_mat@x <- unlist(lapply(coef_list, slot, "x")) + coef_mat@Dim <- as.integer(c(length(coef_list), length(coef_list[[1L]]))) + # Note: function 'xgb.gblinear.history' might later on try to slice by columns + coef_mat <- methods::as(coef_mat, "CsparseMatrix") + return(coef_mat) + } else { + return(unname(do.call(rbind, coef_list))) + } +} + +.extract.coef <- function(model, sparse) { + coefs <- .internal.coef.xgb.Booster(model, add_names = FALSE) + if (NCOL(coefs) > 1L) { + coefs <- as.vector(coefs) + } + if (sparse) { + coefs <- methods::as(coefs, "sparseVector") + } + return(coefs) +} + +#' @title Callback for collecting coefficients history of a gblinear booster +#' @param sparse when set to `FALSE`/`TRUE`, a dense/sparse matrix is used to store the result. +#' Sparse format is useful when one expects only a subset of coefficients to be non-zero, +#' when using the "thrifty" feature selector with fairly small number of top features +#' selected per iteration. +#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +#' @details +#' To keep things fast and simple, gblinear booster does not internally store the history of linear +#' model coefficients at each boosting iteration. This callback provides a workaround for storing +#' the coefficients' path, by extracting them after each training iteration. +#' +#' This callback will construct a matrix where rows are boosting iterations and columns are +#' feature coefficients (same order as when calling \link{coef.xgb.Booster}, with the intercept +#' corresponding to the first column). +#' +#' When there is more than one coefficient per feature (e.g. multi-class classification), +#' the result will be reshaped into a vector where coefficients are arranged first by features and +#' then by class (e.g. first 1 through N coefficients will be for the first class, then +#' coefficients N+1 through 2N for the second class, and so on). +#' +#' If the result has only one coefficient per feature in the data, then the resulting matrix +#' will have column names matching with the feature names, otherwise (when there's more than +#' one coefficient per feature) the names will be composed as 'column name' + ':' + 'class index' +#' (so e.g. column 'c1' for class '0' will be named 'c1:0'). +#' +#' With \code{xgb.train}, the output is either a dense or a sparse matrix. +#' With with \code{xgb.cv}, it is a list (one element per each fold) of such +#' matrices. +#' +#' Function \link{xgb.gblinear.history} function provides an easy way to retrieve the +#' outputs from this callback. +#' @seealso \link{xgb.gblinear.history}, \link{coef.xgb.Booster}. +#' @examples +#' #### Binary classification: +#' +#' ## Keep the number of threads to 1 for examples +#' nthread <- 1 +#' data.table::setDTthreads(nthread) +#' +#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest +#' # without considering the 2nd order interactions: +#' x <- model.matrix(Species ~ .^2, iris)[,-1] +#' colnames(x) +#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread) +#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc", +#' lambda = 0.0003, alpha = 0.0003, nthread = nthread) +#' # For 'shotgun', which is a default linear updater, using high eta values may result in +#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning +#' # rate does not break the convergence, but allows us to illustrate the typical pattern of +#' # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations. +#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1., +#' callbacks = list(xgb.cb.gblinear.history())) +#' # Extract the coefficients' path and plot them vs boosting iteration number: +#' coef_path <- xgb.gblinear.history(bst) +#' matplot(coef_path, type = 'l') +#' +#' # With the deterministic coordinate descent updater, it is safer to use higher learning rates. +#' # Will try the classical componentwise boosting which selects a single best feature per round: +#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8, +#' updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1, +#' callbacks = list(xgb.cb.gblinear.history())) +#' matplot(xgb.gblinear.history(bst), type = 'l') +#' # Componentwise boosting is known to have similar effect to Lasso regularization. +#' # Try experimenting with various values of top_k, eta, nrounds, +#' # as well as different feature_selectors. +#' +#' # For xgb.cv: +#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8, +#' callbacks = list(xgb.cb.gblinear.history())) +#' # coefficients in the CV fold #3 +#' matplot(xgb.gblinear.history(bst)[[3]], type = 'l') +#' +#' +#' #### Multiclass classification: +#' # +#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread) +#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3, +#' lambda = 0.0003, alpha = 0.0003, nthread = nthread) +#' # For the default linear updater 'shotgun' it sometimes is helpful +#' # to use smaller eta to reduce instability +#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5, +#' callbacks = list(xgb.cb.gblinear.history())) +#' # Will plot the coefficient paths separately for each class: +#' matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l') +#' matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l') +#' matplot(xgb.gblinear.history(bst, class_index = 2), type = 'l') +#' +#' # CV: +#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5, +#' callbacks = list(xgb.cb.gblinear.history(FALSE))) +#' # 1st fold of 1st class +#' matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l') +#' +#' @export +xgb.cb.gblinear.history <- function(sparse = FALSE) { + xgb.Callback( + cb_name = "gblinear_history", + env = as.environment(list(sparse = sparse)), + f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) { + if (!inherits(model, "xgb.Booster")) { + model <- model[[1L]]$bst + } + if (xgb.booster_type(model) != "gblinear") { + stop("Callback 'xgb.cb.gblinear.history' is only for booster='gblinear'.") + } + env$coef_hist <- vector("list", end_iteration - begin_iteration + 1) + env$next_idx <- 1 + }, + f_before_iter = NULL, + f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) { + if (inherits(model, "xgb.Booster")) { + coef_this <- .extract.coef(model, env$sparse) + } else { + coef_this <- lapply(model, function(fd) .extract.coef(fd$bst, env$sparse)) + } + env$coef_hist[[env$next_idx]] <- coef_this + env$next_idx <- env$next_idx + 1 + return(FALSE) + }, + f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) { + # in case of early stopping + if (env$next_idx <= length(env$coef_hist)) { + env$coef_hist <- head(env$coef_hist, env$next_idx - 1) + } + + is_booster <- inherits(model, "xgb.Booster") + if (is_booster) { + out <- .list2mat(env$coef_hist, env$sparse) + } else { + out <- lapply( + X = lapply( + X = seq_along(env$coef_hist[[1]]), + FUN = function(i) lapply(env$coef_hist, "[[", i) + ), + FUN = .list2mat, + env$sparse + ) + } + if (!is.null(prev_cb_res)) { + if (is_booster) { + out <- rbind(prev_cb_res, out) + } else { + # Note: this case should never be encountered, since training cannot + # be continued from the result of xgb.cv, but this code should in + # theory do the job if the situation were to be encountered. + out <- lapply( + out, + function(lst) { + lapply( + seq_along(lst), + function(i) rbind(prev_cb_res[[i]], lst[[i]]) + ) + } + ) + } + } + feature_names <- getinfo(data, "feature_name") + if (!NROW(feature_names)) { + feature_names <- paste0("V", seq(1L, ncol(data))) + } + expected_ncols <- length(feature_names) + 1 + if (is_booster) { + mat_ncols <- ncol(out) + } else { + mat_ncols <- ncol(out[[1L]]) + } + if (mat_ncols %% expected_ncols == 0) { + feature_names <- c("(Intercept)", feature_names) + n_rep <- mat_ncols / expected_ncols + if (n_rep > 1) { + feature_names <- unlist( + lapply( + seq(1, n_rep), + function(cl) paste(feature_names, cl - 1, sep = ":") + ) + ) + } + if (is_booster) { + colnames(out) <- feature_names + } else { + out <- lapply( + out, + function(mat) { + colnames(mat) <- feature_names + return(mat) + } + ) + } + } + return(out) + } + ) +} + +#' @title Extract gblinear coefficients history. +#' @description A helper function to extract the matrix of linear coefficients' history +#' from a gblinear model created while using the \link{xgb.cb.gblinear.history} +#' callback (which must be added manually as by default it's not used). +#' @details Note that this is an R-specific function that relies on R attributes that +#' are not saved when using xgboost's own serialization functions like \link{xgb.load} +#' or \link{xgb.load.raw}. +#' +#' In order for a serialized model to be accepted by this function, one must use R +#' serializers such as \link{saveRDS}. +#' @param model either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained +#' using the \link{xgb.cb.gblinear.history} callback, but \bold{not} a booster +#' loaded from \link{xgb.load} or \link{xgb.load.raw}. +#' @param class_index zero-based class index to extract the coefficients for only that +#' specific class in a multinomial multiclass model. When it is NULL, all the +#' coefficients are returned. Has no effect in non-multiclass models. +#' +#' @return +#' For an \link{xgb.train} result, a matrix (either dense or sparse) with the columns +#' corresponding to iteration's coefficients and the rows corresponding to boosting iterations. +#' +#' For an \link{xgb.cv} result, a list of such matrices is returned with the elements +#' corresponding to CV folds. +#' +#' When there is more than one coefficient per feature (e.g. multi-class classification) +#' and `class_index` is not provided, +#' the result will be reshaped into a vector where coefficients are arranged first by features and +#' then by class (e.g. first 1 through N coefficients will be for the first class, then +#' coefficients N+1 through 2N for the second class, and so on). +#' @seealso \link{xgb.cb.gblinear.history}, \link{coef.xgb.Booster}. +#' @export +xgb.gblinear.history <- function(model, class_index = NULL) { + + if (!(inherits(model, "xgb.Booster") || + inherits(model, "xgb.cv.synchronous"))) + stop("model must be an object of either xgb.Booster or xgb.cv.synchronous class") + is_cv <- inherits(model, "xgb.cv.synchronous") + + if (!is_cv) { + coef_path <- getElement(attributes(model), "gblinear_history") + } else { + coef_path <- getElement(model, "gblinear_history") + } + if (is.null(coef_path)) { + stop("model must be trained while using the xgb.cb.gblinear.history() callback") + } + + if (!is_cv) { + num_class <- xgb.num_class(model) + num_feat <- xgb.num_feature(model) + } else { + # in case of CV, the object is expected to have this info + if (model$params$booster != "gblinear") + stop("It does not appear to be a gblinear model") + num_class <- NVL(model$params$num_class, 1) + num_feat <- model$nfeatures + if (is.null(num_feat)) + stop("This xgb.cv result does not have nfeatures info") + } + + if (!is.null(class_index) && + num_class > 1 && + (class_index[1] < 0 || class_index[1] >= num_class)) + stop("class_index has to be within [0,", num_class - 1, "]") + + if (!is.null(class_index) && num_class > 1) { + seq_take <- seq(1 + class_index * (num_feat + 1), (class_index + 1) * (num_feat + 1)) + coef_path <- if (is.list(coef_path)) { + lapply(coef_path, function(x) x[, seq_take]) + } else { + coef_path <- coef_path[, seq_take] + } + } + return(coef_path) +} + +.callbacks.only.train <- "save_model" +.callbacks.only.cv <- "cv_predict" + +.process.callbacks <- function(callbacks, is_cv) { + if (inherits(callbacks, "xgb.Callback")) { + callbacks <- list(callbacks) + } + if (!is.list(callbacks)) { + stop("'callbacks' must be a list.") + } + cb_names <- character() + if (length(callbacks)) { + is_callback <- sapply(callbacks, inherits, "xgb.Callback") + if (!all(is_callback)) { + stop("Entries in 'callbacks' must be 'xgb.Callback' objects.") + } + cb_names <- sapply(callbacks, function(cb) cb$cb_name) + if (length(cb_names) != length(callbacks)) { + stop("Passed invalid callback(s).") + } + if (anyDuplicated(cb_names) > 0) { + stop("Callbacks must have unique names.") + } + if (is_cv) { + if (any(.callbacks.only.train %in% cb_names)) { + stop( + "Passed callback(s) not supported for 'xgb.cv': ", + paste(intersect(.callbacks.only.train, cb_names), collapse = ", ") + ) + } + } else { + if (any(.callbacks.only.cv %in% cb_names)) { + stop( + "Passed callback(s) not supported for 'xgb.train': ", + paste(intersect(.callbacks.only.cv, cb_names), collapse = ", ") + ) + } + } + # Early stopping callback needs to be executed before the others + if ("early_stop" %in% cb_names) { + mask <- cb_names == "early_stop" + callbacks <- c(list(callbacks[[which(mask)]]), callbacks[!mask]) + } + } + return(list(callbacks = callbacks, cb_names = cb_names)) +} + +# Note: don't try to use functions like 'append', as they will +# merge the elements of the different callbacks into a single list. +add.callback <- function(callbacks, cb, as_first_elt = FALSE) { + if (!as_first_elt) { + callbacks[[length(callbacks) + 1]] <- cb + return(callbacks) + } else { + if (!length(callbacks)) { + return(list(cb)) + } + new_cb <- vector("list", length(callbacks) + 1) + new_cb[[1]] <- cb + new_cb[seq(2, length(new_cb))] <- callbacks + return(new_cb) + } +} + +has.callbacks <- function(callbacks, cb_name) { + cb_names <- sapply(callbacks, function(cb) cb$name) + return(cb_name %in% cb_names) } diff --git a/R-package/R/utils.R b/R-package/R/utils.R index e8ae787fc..723310ee4 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -142,7 +142,7 @@ check.custom.eval <- function(env = parent.frame()) { if (!is.null(env$feval) && is.null(env$maximize) && ( !is.null(env$early_stopping_rounds) || - has.callbacks(env$callbacks, 'cb.early.stop'))) + has.callbacks(env$callbacks, "early_stop"))) stop("Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not") } diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index 8a5d66198..77d75fa9c 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -1071,6 +1071,10 @@ xgb.best_iteration <- function(bst) { #' coef(model) #' @export coef.xgb.Booster <- function(object, ...) { + return(.internal.coef.xgb.Booster(object, add_names = TRUE)) +} + +.internal.coef.xgb.Booster <- function(object, add_names = TRUE) { booster_type <- xgb.booster_type(object) if (booster_type != "gblinear") { stop("Coefficients are not defined for Booster type ", booster_type) @@ -1089,21 +1093,27 @@ coef.xgb.Booster <- function(object, ...) { intercepts <- weights[seq(sep + 1, length(weights))] intercepts <- intercepts + as.numeric(base_score) - feature_names <- xgb.feature_names(object) - if (!NROW(feature_names)) { - # This mimics the default naming in R which names columns as "V1..N" - # when names are needed but not available - feature_names <- paste0("V", seq(1L, num_feature)) + if (add_names) { + feature_names <- xgb.feature_names(object) + if (!NROW(feature_names)) { + # This mimics the default naming in R which names columns as "V1..N" + # when names are needed but not available + feature_names <- paste0("V", seq(1L, num_feature)) + } + feature_names <- c("(Intercept)", feature_names) } - feature_names <- c("(Intercept)", feature_names) if (n_cols == 1L) { out <- c(intercepts, coefs) - names(out) <- feature_names + if (add_names) { + names(out) <- feature_names + } } else { coefs <- matrix(coefs, nrow = num_feature, byrow = TRUE) dim(intercepts) <- c(1L, n_cols) out <- rbind(intercepts, coefs) - row.names(out) <- feature_names + if (add_names) { + row.names(out) <- feature_names + } # TODO: if a class names attributes is added, # should use those names here. } @@ -1255,12 +1265,9 @@ print.xgb.Booster <- function(x, ...) { cat(" ", paste(attr_names, collapse = ", "), "\n") } - if (!is.null(R_attrs$callbacks) && length(R_attrs$callbacks) > 0) { - cat('callbacks:\n') - lapply(callback.calls(R_attrs$callbacks), function(x) { - cat(' ') - print(x) - }) + additional_attr <- setdiff(names(R_attrs), .reserved_cb_names) + if (NROW(additional_attr)) { + cat("callbacks:\n ", paste(additional_attr, collapse = ", "), "\n") } if (!is.null(R_attrs$evaluation_log)) { diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 29bddb57f..23ca0f2de 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -27,7 +27,7 @@ #' that NA values should be considered as 'missing' by the algorithm. #' Sometimes, 0 or other extreme value might be used to represent missing values. #' @param prediction A logical value indicating whether to return the test fold predictions -#' from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback. +#' from each CV model. This parameter engages the \code{\link{xgb.cb.cv.predict}} callback. #' @param showsd \code{boolean}, whether to show standard deviation of cross validation #' @param metrics, list of evaluation metrics to be used in cross validation, #' when it is not specified, the evaluation metric is chosen according to objective function. @@ -57,17 +57,17 @@ #' @param verbose \code{boolean}, print the statistics during the process #' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}. #' Default is 1 which means all messages are printed. This parameter is passed to the -#' \code{\link{cb.print.evaluation}} callback. +#' \code{\link{xgb.cb.print.evaluation}} callback. #' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered. #' If set to an integer \code{k}, training with a validation set will stop if the performance #' doesn't improve for \code{k} rounds. -#' Setting this parameter engages the \code{\link{cb.early.stop}} callback. +#' Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback. #' @param maximize If \code{feval} and \code{early_stopping_rounds} are set, #' then this parameter must be set as well. #' When it is \code{TRUE}, it means the larger the evaluation score the better. -#' This parameter is passed to the \code{\link{cb.early.stop}} callback. +#' This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback. #' @param callbacks a list of callback functions to perform various task during boosting. -#' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the +#' See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the #' parameters' values. User can provide either existing or their own callback methods in order #' to customize the training process. #' @param ... other parameters to pass to \code{params}. @@ -90,25 +90,25 @@ #' \itemize{ #' \item \code{call} a function call. #' \item \code{params} parameters that were passed to the xgboost library. Note that it does not -#' capture parameters changed by the \code{\link{cb.reset.parameters}} callback. -#' \item \code{callbacks} callback functions that were either automatically assigned or -#' explicitly passed. +#' capture parameters changed by the \code{\link{xgb.cb.reset.parameters}} callback. #' \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the #' first column corresponding to iteration number and the rest corresponding to the #' CV-based evaluation means and standard deviations for the training and test CV-sets. -#' It is created by the \code{\link{cb.evaluation.log}} callback. +#' It is created by the \code{\link{xgb.cb.evaluation.log}} callback. #' \item \code{niter} number of boosting iterations. #' \item \code{nfeatures} number of features in training data. #' \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds} #' parameter or randomly generated. #' \item \code{best_iteration} iteration number with the best evaluation metric value #' (only available with early stopping). -#' \item \code{pred} CV prediction values available when \code{prediction} is set. -#' It is either vector or matrix (see \code{\link{cb.cv.predict}}). -#' \item \code{models} a list of the CV folds' models. It is only available with the explicit -#' setting of the \code{cb.cv.predict(save_models = TRUE)} callback. #' } #' +#' Plus other potential elements that are the result of callbacks, such as a list `cv_predict` with +#' a sub-element `pred` when passing `prediction = TRUE`, which is added by the \link{xgb.cb.cv.predict} +#' callback (note that one can also pass it manually under `callbacks` with different settings, +#' such as saving also the models created during cross validation); or a list `early_stop` which +#' will contain elements such as `best_iteration` when using the early stopping callback (\link{xgb.cb.early.stop}). +#' #' @examples #' data(agaricus.train, package='xgboost') #' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) @@ -160,32 +160,38 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing folds <- generate.cv.folds(nfold, nrow(data), stratified, cv_label, params) } + # Callbacks + tmp <- .process.callbacks(callbacks, is_cv = TRUE) + callbacks <- tmp$callbacks + cb_names <- tmp$cb_names + rm(tmp) + + # Early stopping callback + if (!is.null(early_stopping_rounds) && !("early_stop" %in% cb_names)) { + callbacks <- add.callback( + callbacks, + xgb.cb.early.stop( + early_stopping_rounds, + maximize = maximize, + verbose = verbose + ), + as_first_elt = TRUE + ) + } # verbosity & evaluation printing callback: params <- c(params, list(silent = 1)) print_every_n <- max(as.integer(print_every_n), 1L) - if (!has.callbacks(callbacks, 'cb.print.evaluation') && verbose) { - callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n, showsd = showsd)) + if (verbose && !("print_evaluation" %in% cb_names)) { + callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n, showsd = showsd)) } # evaluation log callback: always is on in CV - evaluation_log <- list() - if (!has.callbacks(callbacks, 'cb.evaluation.log')) { - callbacks <- add.cb(callbacks, cb.evaluation.log()) - } - # Early stopping callback - stop_condition <- FALSE - if (!is.null(early_stopping_rounds) && - !has.callbacks(callbacks, 'cb.early.stop')) { - callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds, - maximize = maximize, verbose = verbose)) + if (!("evaluation_log" %in% cb_names)) { + callbacks <- add.callback(callbacks, xgb.cb.evaluation.log()) } # CV-predictions callback - if (prediction && - !has.callbacks(callbacks, 'cb.cv.predict')) { - callbacks <- add.cb(callbacks, cb.cv.predict(save_models = FALSE)) + if (prediction && !("cv_predict" %in% cb_names)) { + callbacks <- add.callback(callbacks, xgb.cb.cv.predict(save_models = FALSE)) } - # Sort the callbacks into categories - cb <- categorize.callbacks(callbacks) - # create the booster-folds # train_folds @@ -211,9 +217,6 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing bst <- bst$bst list(dtrain = dtrain, bst = bst, watchlist = list(train = dtrain, test = dtest), index = folds[[k]]) }) - rm(dall) - # a "basket" to collect some results from callbacks - basket <- list() # extract parameters that can affect the relationship b/w #trees and #iterations num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint @@ -222,10 +225,25 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing begin_iteration <- 1 end_iteration <- nrounds + .execute.cb.before.training( + callbacks, + bst_folds, + dall, + NULL, + begin_iteration, + end_iteration + ) + # synchronous CV boosting: run CV folds' models within each iteration for (iteration in begin_iteration:end_iteration) { - for (f in cb$pre_iter) f() + .execute.cb.before.iter( + callbacks, + bst_folds, + dall, + NULL, + iteration + ) msg <- lapply(bst_folds, function(fd) { xgb.iter.update( @@ -242,27 +260,36 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing ) }) msg <- simplify2array(msg) - # Note: these variables might look unused here, but they are used in the callbacks - bst_evaluation <- rowMeans(msg) # nolint - bst_evaluation_err <- apply(msg, 1, sd) # nolint - for (f in cb$post_iter) f() + should_stop <- .execute.cb.after.iter( + callbacks, + bst_folds, + dall, + NULL, + iteration, + msg + ) - if (stop_condition) break + if (should_stop) break } - for (f in cb$finalize) f(finalize = TRUE) + cb_outputs <- .execute.cb.after.training( + callbacks, + bst_folds, + dall, + NULL, + iteration, + msg + ) # the CV result ret <- list( call = match.call(), params = params, - callbacks = callbacks, - evaluation_log = evaluation_log, - niter = end_iteration, - nfeatures = ncol(data), + niter = iteration, + nfeatures = ncol(dall), folds = folds ) - ret <- c(ret, basket) + ret <- c(ret, cb_outputs) class(ret) <- 'xgb.cv.synchronous' return(invisible(ret)) @@ -308,23 +335,16 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) { paste0('"', unlist(x$params), '"'), sep = ' = ', collapse = ', '), '\n', sep = '') } - if (!is.null(x$callbacks) && length(x$callbacks) > 0) { - cat('callbacks:\n') - lapply(callback.calls(x$callbacks), function(x) { - cat(' ') - print(x) - }) - } for (n in c('niter', 'best_iteration')) { - if (is.null(x[[n]])) + if (is.null(x$early_stop[[n]])) next - cat(n, ': ', x[[n]], '\n', sep = '') + cat(n, ': ', x$early_stop[[n]], '\n', sep = '') } - if (!is.null(x$pred)) { + if (!is.null(x$cv_predict$pred)) { cat('pred:\n') - str(x$pred) + str(x$cv_predict$pred) } } @@ -332,9 +352,9 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) { cat('evaluation_log:\n') print(x$evaluation_log, row.names = FALSE, ...) - if (!is.null(x$best_iteration)) { + if (!is.null(x$early_stop$best_iteration)) { cat('Best iteration:\n') - print(x$evaluation_log[x$best_iteration], row.names = FALSE, ...) + print(x$evaluation_log[x$early_stop$best_iteration], row.names = FALSE, ...) } invisible(x) } diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R index 4985f74b5..d5b192bcb 100644 --- a/R-package/R/xgb.load.R +++ b/R-package/R/xgb.load.R @@ -6,7 +6,7 @@ #' #' @details #' The input file is expected to contain a model saved in an xgboost model format -#' using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some +#' using either \code{\link{xgb.save}} or \code{\link{xgb.cb.save.model}} in R, or using some #' appropriate methods from other xgboost interfaces. E.g., a model trained in Python and #' saved from there in xgboost format, could be loaded from R. #' diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index 44cde2e7a..34c21d552 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -118,7 +118,7 @@ #' Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each #' of these datasets during each boosting iteration, and stored in the end as a field named #' \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or -#' \code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously +#' \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously #' printed out during the training. #' E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track #' the performance of each round's model on mat1 and mat2. @@ -130,31 +130,32 @@ #' @param verbose If 0, xgboost will stay silent. If 1, it will print information about performance. #' If 2, some additional information will be printed out. #' Note that setting \code{verbose > 0} automatically engages the -#' \code{cb.print.evaluation(period=1)} callback function. +#' \code{xgb.cb.print.evaluation(period=1)} callback function. #' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}. #' Default is 1 which means all messages are printed. This parameter is passed to the -#' \code{\link{cb.print.evaluation}} callback. +#' \code{\link{xgb.cb.print.evaluation}} callback. #' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered. #' If set to an integer \code{k}, training with a validation set will stop if the performance #' doesn't improve for \code{k} rounds. -#' Setting this parameter engages the \code{\link{cb.early.stop}} callback. +#' Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback. #' @param maximize If \code{feval} and \code{early_stopping_rounds} are set, #' then this parameter must be set as well. #' When it is \code{TRUE}, it means the larger the evaluation score the better. -#' This parameter is passed to the \code{\link{cb.early.stop}} callback. +#' This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback. #' @param save_period when it is non-NULL, model is saved to disk after every \code{save_period} rounds, -#' 0 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback. +#' 0 means save at the end. The saving is handled by the \code{\link{xgb.cb.save.model}} callback. #' @param save_name the name or path for periodically saved model file. #' @param xgb_model a previously built model to continue the training from. #' Could be either an object of class \code{xgb.Booster}, or its raw data, or the name of a #' file with a previously saved model. #' @param callbacks a list of callback functions to perform various task during boosting. -#' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the +#' See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the #' parameters' values. User can provide either existing or their own callback methods in order #' to customize the training process. #' -#' Note that some callbacks might try to set an evaluation log - be aware that these evaluation logs -#' are kept as R attributes, and thus do not get saved when using non-R serializaters like +#' Note that some callbacks might try to leave attributes in the resulting model object, +#' such as an evaluation log (a `data.table` object) - be aware that these objects are kept +#' as R attributes, and thus do not get saved when using XGBoost's own serializaters like #' \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}). #' @param ... other parameters to pass to \code{params}. #' @param label vector of response values. Should not be provided when data is @@ -206,18 +207,19 @@ #' #' The following callbacks are automatically created when certain parameters are set: #' \itemize{ -#' \item \code{cb.print.evaluation} is turned on when \code{verbose > 0}; +#' \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0}; #' and the \code{print_every_n} parameter is passed to it. -#' \item \code{cb.evaluation.log} is on when \code{watchlist} is present. -#' \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set. -#' \item \code{cb.save.model}: when \code{save_period > 0} is set. +#' \item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present. +#' \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set. +#' \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set. #' } #' #' Note that objects of type `xgb.Booster` as returned by this function behave a bit differently #' from typical R objects (it's an 'altrep' list class), and it makes a separation between #' internal booster attributes (restricted to jsonifyable data), accessed through \link{xgb.attr} #' and shared between interfaces through serialization functions like \link{xgb.save}; and -#' R-specific attributes, accessed through \link{attributes} and \link{attr}, which are otherwise +#' R-specific attributes (typically the result from a callback), accessed through \link{attributes} +#' and \link{attr}, which are otherwise #' only used in the R interface, only kept when using R's serializers like \link{saveRDS}, and #' not anyhow used by functions like \link{predict.xgb.Booster}. #' @@ -229,7 +231,7 @@ #' effect elsewhere. #' #' @seealso -#' \code{\link{callbacks}}, +#' \code{\link{xgb.Callback}}, #' \code{\link{predict.xgb.Booster}}, #' \code{\link{xgb.cv}} #' @@ -295,7 +297,7 @@ #' objective = "binary:logistic", eval_metric = "auc") #' my_etas <- list(eta = c(0.5, 0.1)) #' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, -#' callbacks = list(cb.reset.parameters(my_etas))) +#' callbacks = list(xgb.cb.reset.parameters(my_etas))) #' #' ## Early stopping: #' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist, @@ -339,47 +341,47 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(), params <- c(params, list(eval_metric = m)) } - # evaluation printing callback params <- c(params) - print_every_n <- max(as.integer(print_every_n), 1L) - if (!has.callbacks(callbacks, 'cb.print.evaluation') && - verbose) { - callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n)) - } - # evaluation log callback: it is automatically enabled when watchlist is provided - evaluation_log <- list() - if (!has.callbacks(callbacks, 'cb.evaluation.log') && - length(watchlist) > 0) { - callbacks <- add.cb(callbacks, cb.evaluation.log()) - } - # Model saving callback - if (!is.null(save_period) && - !has.callbacks(callbacks, 'cb.save.model')) { - callbacks <- add.cb(callbacks, cb.save.model(save_period, save_name)) - } - # Early stopping callback - stop_condition <- FALSE - if (!is.null(early_stopping_rounds) && - !has.callbacks(callbacks, 'cb.early.stop')) { - callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds, - maximize = maximize, verbose = verbose)) - } - - # Sort the callbacks into categories - cb <- categorize.callbacks(callbacks) params['validate_parameters'] <- TRUE if (!("seed" %in% names(params))) { params[["seed"]] <- sample(.Machine$integer.max, size = 1) } + # callbacks + tmp <- .process.callbacks(callbacks, is_cv = FALSE) + callbacks <- tmp$callbacks + cb_names <- tmp$cb_names + rm(tmp) + + # Early stopping callback (should always come first) + if (!is.null(early_stopping_rounds) && !("early_stop" %in% cb_names)) { + callbacks <- add.callback( + callbacks, + xgb.cb.early.stop( + early_stopping_rounds, + maximize = maximize, + verbose = verbose + ), + as_first_elt = TRUE + ) + } + # evaluation printing callback + print_every_n <- max(as.integer(print_every_n), 1L) + if (verbose && !("print_evaluation" %in% cb_names)) { + callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n)) + } + # evaluation log callback: it is automatically enabled when watchlist is provided + if (length(watchlist) && !("evaluation_log" %in% cb_names)) { + callbacks <- add.callback(callbacks, xgb.cb.evaluation.log()) + } + # Model saving callback + if (!is.null(save_period) && !("save_model" %in% cb_names)) { + callbacks <- add.callback(callbacks, xgb.cb.save.model(save_period, save_name)) + } + # The tree updating process would need slightly different handling is_update <- NVL(params[['process_type']], '.') == 'update' - past_evaluation_log <- NULL - if (inherits(xgb_model, "xgb.Booster")) { - past_evaluation_log <- attributes(xgb_model)$evaluation_log - } - # Construct a booster (either a new one or load from xgb_model) bst <- xgb.Booster( params = params, @@ -394,11 +396,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(), dtrain ) - # extract parameters that can affect the relationship b/w #trees and #iterations - # Note: it might look like these aren't used, but they need to be defined in this - # environment for the callbacks for work correctly. - num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint - if (is_update && nrounds > niter_init) stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)") @@ -406,20 +403,36 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(), begin_iteration <- niter_skip + 1 end_iteration <- niter_skip + nrounds + .execute.cb.before.training( + callbacks, + bst, + dtrain, + watchlist, + begin_iteration, + end_iteration + ) + # the main loop for boosting iterations for (iteration in begin_iteration:end_iteration) { - for (f in cb$pre_iter) f() - - xgb.iter.update( - bst = bst, - dtrain = dtrain, - iter = iteration - 1, - obj = obj + .execute.cb.before.iter( + callbacks, + bst, + dtrain, + watchlist, + iteration ) + xgb.iter.update( + bst = bst, + dtrain = dtrain, + iter = iteration - 1, + obj = obj + ) + + bst_evaluation <- NULL if (length(watchlist) > 0) { - bst_evaluation <- xgb.iter.eval( # nolint: object_usage_linter + bst_evaluation <- xgb.iter.eval( bst = bst, watchlist = watchlist, iter = iteration - 1, @@ -427,36 +440,46 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(), ) } - for (f in cb$post_iter) f() + should_stop <- .execute.cb.after.iter( + callbacks, + bst, + dtrain, + watchlist, + iteration, + bst_evaluation + ) - if (stop_condition) break + if (should_stop) break } - for (f in cb$finalize) f(finalize = TRUE) - # store the evaluation results - keep_evaluation_log <- FALSE - if (length(evaluation_log) > 0 && nrow(evaluation_log) > 0) { - keep_evaluation_log <- TRUE - # include the previous compatible history when available - if (inherits(xgb_model, 'xgb.Booster') && - !is_update && - !is.null(past_evaluation_log) && - isTRUE(all.equal(colnames(evaluation_log), - colnames(past_evaluation_log)))) { - evaluation_log <- rbindlist(list(past_evaluation_log, evaluation_log)) - } - } + cb_outputs <- .execute.cb.after.training( + callbacks, + bst, + dtrain, + watchlist, + iteration, + bst_evaluation + ) extra_attrs <- list( call = match.call(), - params = params, - callbacks = callbacks + params = params ) - if (keep_evaluation_log) { - extra_attrs$evaluation_log <- evaluation_log - } + curr_attrs <- attributes(bst) - attributes(bst) <- c(curr_attrs, extra_attrs) + if (NROW(curr_attrs)) { + curr_attrs <- curr_attrs[ + setdiff( + names(curr_attrs), + c(names(extra_attrs), names(cb_outputs)) + ) + ] + } + curr_attrs <- c(extra_attrs, curr_attrs) + if (NROW(cb_outputs)) { + curr_attrs <- c(curr_attrs, cb_outputs) + } + attributes(bst) <- curr_attrs return(bst) } diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index 170aa5ffd..7fecec39c 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -82,12 +82,8 @@ NULL NULL # Various imports -#' @importClassesFrom Matrix dgCMatrix dgeMatrix dgRMatrix -#' @importFrom Matrix colSums +#' @importClassesFrom Matrix dgCMatrix dgRMatrix CsparseMatrix #' @importFrom Matrix sparse.model.matrix -#' @importFrom Matrix sparseVector -#' @importFrom Matrix sparseMatrix -#' @importFrom Matrix t #' @importFrom data.table data.table #' @importFrom data.table is.data.table #' @importFrom data.table as.data.table @@ -103,6 +99,7 @@ NULL #' @importFrom stats coef #' @importFrom stats predict #' @importFrom stats median +#' @importFrom stats sd #' @importFrom stats variable.names #' @importFrom utils head #' @importFrom graphics barplot diff --git a/R-package/man/callbacks.Rd b/R-package/man/callbacks.Rd deleted file mode 100644 index 9f6f69015..000000000 --- a/R-package/man/callbacks.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/callbacks.R -\name{callbacks} -\alias{callbacks} -\title{Callback closures for booster training.} -\description{ -These are used to perform various service tasks either during boosting iterations or at the end. -This approach helps to modularize many of such tasks without bloating the main training methods, -and it offers . -} -\details{ -By default, a callback function is run after each boosting iteration. -An R-attribute \code{is_pre_iteration} could be set for a callback to define a pre-iteration function. - -When a callback function has \code{finalize} parameter, its finalizer part will also be run after -the boosting is completed. - -WARNING: side-effects!!! Be aware that these callback functions access and modify things in -the environment from which they are called from, which is a fairly uncommon thing to do in R. - -To write a custom callback closure, make sure you first understand the main concepts about R environments. -Check either R documentation on \code{\link[base]{environment}} or the -\href{http://adv-r.had.co.nz/Environments.html}{Environments chapter} from the "Advanced R" -book by Hadley Wickham. Further, the best option is to read the code of some of the existing callbacks - -choose ones that do something similar to what you want to achieve. Also, you would need to get familiar -with the objects available inside of the \code{xgb.train} and \code{xgb.cv} internal environments. -} -\seealso{ -\code{\link{cb.print.evaluation}}, -\code{\link{cb.evaluation.log}}, -\code{\link{cb.reset.parameters}}, -\code{\link{cb.early.stop}}, -\code{\link{cb.save.model}}, -\code{\link{cb.cv.predict}}, -\code{\link{xgb.train}}, -\code{\link{xgb.cv}} -} diff --git a/R-package/man/cb.early.stop.Rd b/R-package/man/cb.early.stop.Rd deleted file mode 100644 index 7cd51a3ce..000000000 --- a/R-package/man/cb.early.stop.Rd +++ /dev/null @@ -1,62 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/callbacks.R -\name{cb.early.stop} -\alias{cb.early.stop} -\title{Callback closure to activate the early stopping.} -\usage{ -cb.early.stop( - stopping_rounds, - maximize = FALSE, - metric_name = NULL, - verbose = TRUE -) -} -\arguments{ -\item{stopping_rounds}{The number of rounds with no improvement in -the evaluation metric in order to stop the training.} - -\item{maximize}{whether to maximize the evaluation metric} - -\item{metric_name}{the name of an evaluation column to use as a criteria for early -stopping. If not set, the last column would be used. -Let's say the test data in \code{watchlist} was labelled as \code{dtest}, -and one wants to use the AUC in test data for early stopping regardless of where -it is in the \code{watchlist}, then one of the following would need to be set: -\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}. -All dash '-' characters in metric names are considered equivalent to '_'.} - -\item{verbose}{whether to print the early stopping information.} -} -\description{ -Callback closure to activate the early stopping. -} -\details{ -This callback function determines the condition for early stopping -by setting the \code{stop_condition = TRUE} flag in its calling frame. - -The following additional fields are assigned to the model's R object: -\itemize{ -\item \code{best_score} the evaluation score at the best iteration -\item \code{best_iteration} at which boosting iteration the best score has occurred (1-based index) -} -The Same values are also stored as xgb-attributes: -\itemize{ -\item \code{best_iteration} is stored as a 0-based iteration index (for interoperability of binary models) -\item \code{best_msg} message string is also stored. -} - -At least one data element is required in the evaluation watchlist for early stopping to work. - -Callback function expects the following values to be set in its calling frame: -\code{stop_condition}, -\code{bst_evaluation}, -\code{rank}, -\code{bst} (or \code{bst_folds} and \code{basket}), -\code{iteration}, -\code{begin_iteration}, -\code{end_iteration}, -} -\seealso{ -\code{\link{callbacks}}, -\code{\link{xgb.attr}} -} diff --git a/R-package/man/cb.evaluation.log.Rd b/R-package/man/cb.evaluation.log.Rd deleted file mode 100644 index 94f8a02e6..000000000 --- a/R-package/man/cb.evaluation.log.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/callbacks.R -\name{cb.evaluation.log} -\alias{cb.evaluation.log} -\title{Callback closure for logging the evaluation history} -\usage{ -cb.evaluation.log() -} -\description{ -Callback closure for logging the evaluation history -} -\details{ -This callback function appends the current iteration evaluation results \code{bst_evaluation} -available in the calling parent frame to the \code{evaluation_log} list in a calling frame. - -The finalizer callback (called with \code{finalize = TURE} in the end) converts -the \code{evaluation_log} list into a final data.table. - -The iteration evaluation result \code{bst_evaluation} must be a named numeric vector. - -Note: in the column names of the final data.table, the dash '-' character is replaced with -the underscore '_' in order to make the column names more like regular R identifiers. - -Callback function expects the following values to be set in its calling frame: -\code{evaluation_log}, -\code{bst_evaluation}, -\code{iteration}. -} -\seealso{ -\code{\link{callbacks}} -} diff --git a/R-package/man/cb.print.evaluation.Rd b/R-package/man/cb.print.evaluation.Rd deleted file mode 100644 index 59b9ba65e..000000000 --- a/R-package/man/cb.print.evaluation.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/callbacks.R -\name{cb.print.evaluation} -\alias{cb.print.evaluation} -\title{Callback closure for printing the result of evaluation} -\usage{ -cb.print.evaluation(period = 1, showsd = TRUE) -} -\arguments{ -\item{period}{results would be printed every number of periods} - -\item{showsd}{whether standard deviations should be printed (when available)} -} -\description{ -Callback closure for printing the result of evaluation -} -\details{ -The callback function prints the result of evaluation at every \code{period} iterations. -The initial and the last iteration's evaluations are always printed. - -Callback function expects the following values to be set in its calling frame: -\code{bst_evaluation} (also \code{bst_evaluation_err} when available), -\code{iteration}, -\code{begin_iteration}, -\code{end_iteration}. -} -\seealso{ -\code{\link{callbacks}} -} diff --git a/R-package/man/cb.save.model.Rd b/R-package/man/cb.save.model.Rd deleted file mode 100644 index 7701ad990..000000000 --- a/R-package/man/cb.save.model.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/callbacks.R -\name{cb.save.model} -\alias{cb.save.model} -\title{Callback closure for saving a model file.} -\usage{ -cb.save.model(save_period = 0, save_name = "xgboost.ubj") -} -\arguments{ -\item{save_period}{save the model to disk after every -\code{save_period} iterations; 0 means save the model at the end.} - -\item{save_name}{the name or path for the saved model file. - -\if{html}{\out{
}}\preformatted{ Note that the format of the model being saved is determined by the file - extension specified here (see \link{xgb.save} for details about how it works). - - It can contain a \code{\link[base]{sprintf}} formatting specifier - to include the integer iteration number in the file name. - E.g., with \code{save_name} = 'xgboost_\%04d.ubj', - the file saved at iteration 50 would be named "xgboost_0050.ubj". -}\if{html}{\out{
}}} -} -\description{ -Callback closure for saving a model file. -} -\details{ -This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end. - -Callback function expects the following values to be set in its calling frame: -\code{bst}, -\code{iteration}, -\code{begin_iteration}, -\code{end_iteration}. -} -\seealso{ -\link{xgb.save} - -\code{\link{callbacks}} -} diff --git a/R-package/man/xgb.Callback.Rd b/R-package/man/xgb.Callback.Rd new file mode 100644 index 000000000..ed1dd7bed --- /dev/null +++ b/R-package/man/xgb.Callback.Rd @@ -0,0 +1,248 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/callbacks.R +\name{xgb.Callback} +\alias{xgb.Callback} +\title{XGBoost Callback Constructor} +\usage{ +xgb.Callback( + cb_name = "custom_callback", + env = new.env(), + f_before_training = function(env, model, data, watchlist, begin_iteration, + end_iteration) NULL, + f_before_iter = function(env, model, data, watchlist, iteration) NULL, + f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL, + f_after_training = function(env, model, data, watchlist, iteration, final_feval, + prev_cb_res) NULL +) +} +\arguments{ +\item{cb_name}{Name for the callback. + +If the callback produces some non-NULL result (from executing the function passed under +\code{f_after_training}), that result will be added as an R attribute to the resulting booster +(or as a named element in the result of CV), with the attribute name specified here. + +Names of callbacks must be unique - i.e. there cannot be two callbacks with the same name.} + +\item{env}{An environment object that will be passed to the different functions in the callback. +Note that this environment will not be shared with other callbacks.} + +\item{f_before_training}{A function that will be executed before the training has started. + +If passing \code{NULL} for this or for the other function inputs, then no function will be executed. + +If passing a function, it will be called with parameters supplied as non-named arguments +matching the function signatures that are shown in the default value for each function argument.} + +\item{f_before_iter}{A function that will be executed before each boosting round. + +This function can signal whether the training should be finalized or not, by outputting +a value that evaluates to \code{TRUE} - i.e. if the output from the function provided here at +a given round is \code{TRUE}, then training will be stopped before the current iteration happens. + +Return values of \code{NULL} will be interpreted as \code{FALSE}.} + +\item{f_after_iter}{A function that will be executed after each boosting round. + +This function can signal whether the training should be finalized or not, by outputting +a value that evaluates to \code{TRUE} - i.e. if the output from the function provided here at +a given round is \code{TRUE}, then training will be stopped at that round. + +Return values of \code{NULL} will be interpreted as \code{FALSE}.} + +\item{f_after_training}{A function that will be executed after training is finished. + +This function can optionally output something non-NULL, which will become part of the R +attributes of the booster (assuming one passes \code{keep_extra_attributes=TRUE} to \link{xgb.train}) +under the name supplied for parameter \code{cb_name} imn the case of \link{xgb.train}; or a part +of the named elements in the result of \link{xgb.cv}.} +} +\value{ +An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +} +\description{ +Constructor for defining the structure of callback functions that can be executed +at different stages of model training (before / after training, before / after each boosting +iteration). +} +\details{ +Arguments that will be passed to the supplied functions are as follows:\itemize{ + +\item env The same environment that is passed under argument \code{env}. + +It may be modified by the functions in order to e.g. keep tracking of what happens +across iterations or similar. + +This environment is only used by the functions supplied to the callback, and will +not be kept after the model fitting function terminates (see parameter \code{f_after_training}). + +\item model The booster object when using \link{xgb.train}, or the folds when using +\link{xgb.cv}. + +For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{ +\item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object). +\item \code{bst}: Rhe \code{xgb.Booster} object for the fold. +\item \code{watchlist}: A list with two DMatrices, with names \code{train} and \code{test} +(\code{test} is the held-out data for the fold). +\item \code{index}: The indices of the hold-out data for that fold (base-1 indexing), +from which the \code{test} entry in the watchlist was obtained. +} + +This object should \bold{not} be in-place modified in ways that conflict with the +training (e.g. resetting the parameters for a training update in a way that resets +the number of rounds to zero in order to overwrite rounds). + +Note that any R attributes that are assigned to the booster during the callback functions, +will not be kept thereafter as the booster object variable is not re-assigned during +training. It is however possible to set C-level attributes of the booster through +\link{xgb.attr} or \link{xgb.attributes}, which should remain available for the rest +of the iterations and after the training is done. + +For keeping variables across iterations, it's recommended to use \code{env} instead. +\item data The data to which the model is being fit, as an \code{xgb.DMatrix} object. + +Note that, for \link{xgb.cv}, this will be the full data, while data for the specific +folds can be found in the \code{model} object. + +\item watchlist The evaluation watchlist, as passed under argument \code{watchlist} to +\link{xgb.train}. + +For \link{xgb.cv}, this will always be \code{NULL}. + +\item begin_iteration Index of the first boosting iteration that will be executed +(base-1 indexing). + +This will typically be '1', but when using training continuation, depending on the +parameters for updates, boosting rounds will be continued from where the previous +model ended, in which case this will be larger than 1. + +\item end_iteration Index of the last boostign iteration that will be executed +(base-1 indexing, inclusive of this end). + +It should match with argument \code{nrounds} passed to \link{xgb.train} or \link{xgb.cv}. + +Note that boosting might be interrupted before reaching this last iteration, for +example by using the early stopping callback \link{xgb.cb.early.stop}. + +\item iteration Index of the iteration number that is being executed (first iteration +will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on). + +\item iter_feval Evaluation metrics for the \code{watchlist} that was supplied, either +determined by the objective, or by parameter \code{feval}. + +For \link{xgb.train}, this will be a named vector with one entry per element in +\code{watchlist}, where the names are determined as 'watchlist name' + '-' + 'metric name' - for +example, if \code{watchlist} contains an entry named "tr" and the metric is "rmse", +this will be a one-element vector with name "tr-rmse". + +For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(watchlist), nfolds]}, +where the row names will follow the same naming logic as the one-dimensional vector +that is passed in \link{xgb.train}. + +Note that, internally, the built-in callbacks such as \link{xgb.cb.print.evaluation} summarize +this table by calculating the row-wise means and standard deviations. + +\item final_feval The evaluation results after the last boosting round is executed +(same format as \code{iter_feval}, and will be the exact same input as passed under +\code{iter_feval} to the last round that is executed during model fitting). + +\item prev_cb_res Result from a previous run of a callback sharing the same name +(as given by parameter \code{cb_name}) when conducting training continuation, if there +was any in the booster R attributes. + +Some times, one might want to append the new results to the previous one, and this will +be done automatically by the built-in callbacks such as \link{xgb.cb.evaluation.log}, +which will append the new rows to the previous table. + +If no such previous callback result is available (which it never will when fitting +a model from start instead of updating an existing model), this will be \code{NULL}. + +For \link{xgb.cv}, which doesn't support training continuation, this will always be \code{NULL}. +} + +The following names (\code{cb_name} values) are reserved for internal callbacks:\itemize{ +\item print_evaluation +\item evaluation_log +\item reset_parameters +\item early_stop +\item save_model +\item cv_predict +\item gblinear_history +} + +The following names are reserved for other non-callback attributes:\itemize{ +\item names +\item class +\item call +\item params +\item niter +\item nfeatures +\item folds +} + +When using the built-in early stopping callback (\link{xgb.cb.early.stop}), said callback +will always be executed before the others, as it sets some booster C-level attributes +that other callbacks might also use. Otherwise, the order of execution will match with +the order in which the callbacks are passed to the model fitting function. +} +\examples{ +# Example constructing a custom callback that calculates +# squared error on the training data, without a watchlist, +# and outputs the per-iteration results. +ssq_callback <- xgb.Callback( + cb_name = "ssq", + f_before_training = function(env, model, data, watchlist, + begin_iteration, end_iteration) { + # A vector to keep track of a number at each iteration + env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1) + }, + f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) { + # This calculates the sum of squared errors on the training data. + # Note that this can be better done by passing a 'watchlist' entry, + # but this demonstrates a way in which callbacks can be structured. + pred <- predict(model, data) + err <- pred - getinfo(data, "label") + sq_err <- sum(err^2) + env$logs[iteration] <- sq_err + cat( + sprintf( + "Squared error at iteration \%d: \%.2f\n", + iteration, sq_err + ) + ) + + # A return value of 'TRUE' here would signal to finalize the training + return(FALSE) + }, + f_after_training = function(env, model, data, watchlist, iteration, + final_feval, prev_cb_res) { + return(env$logs) + } +) + +data(mtcars) +y <- mtcars$mpg +x <- as.matrix(mtcars[, -1]) +dm <- xgb.DMatrix(x, label = y, nthread = 1) +model <- xgb.train( + data = dm, + params = list(objective = "reg:squarederror", nthread = 1), + nrounds = 5, + callbacks = list(ssq_callback), + keep_extra_attributes = TRUE +) + +# Result from 'f_after_iter' will be available as an attribute +attributes(model)$ssq +} +\seealso{ +Built-in callbacks:\itemize{ +\item \link{xgb.cb.print.evaluation} +\item \link{xgb.cb.evaluation.log} +\item \link{xgb.cb.reset.parameters} +\item \link{xgb.cb.early.stop} +\item \link{xgb.cb.save.model} +\item \link{xgb.cb.cv.predict} +\item \link{xgb.cb.gblinear.history} +} +} diff --git a/R-package/man/cb.cv.predict.Rd b/R-package/man/xgb.cb.cv.predict.Rd similarity index 53% rename from R-package/man/cb.cv.predict.Rd rename to R-package/man/xgb.cb.cv.predict.Rd index 4cabac1c9..d2d9a084b 100644 --- a/R-package/man/cb.cv.predict.Rd +++ b/R-package/man/xgb.cb.cv.predict.Rd @@ -1,16 +1,27 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/callbacks.R -\name{cb.cv.predict} -\alias{cb.cv.predict} -\title{Callback closure for returning cross-validation based predictions.} +\name{xgb.cb.cv.predict} +\alias{xgb.cb.cv.predict} +\title{Callback for returning cross-validation based predictions.} \usage{ -cb.cv.predict(save_models = FALSE) +xgb.cb.cv.predict(save_models = FALSE, outputmargin = FALSE) } \arguments{ -\item{save_models}{a flag for whether to save the folds' models.} +\item{save_models}{A flag for whether to save the folds' models.} + +\item{outputmargin}{Whether to save margin predictions (same effect as passing this +parameter to \link{predict.xgb.Booster}).} } \value{ -Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix, +An \code{xgb.Callback} object, which can be passed to \link{xgb.cv}, +but \bold{not} to \link{xgb.train}. +} +\description{ +This callback function saves predictions for all of the test folds, +and also allows to save the folds' models. +} +\details{ +Predictions are saved inside of the \code{pred} element, which is either a vector or a matrix, depending on the number of prediction outputs per data row. The order of predictions corresponds to the order of rows in the original dataset. Note that when a custom \code{folds} list is provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a @@ -19,23 +30,3 @@ meaningful when user-provided folds have overlapping indices as in, e.g., random When some of the indices in the training dataset are not included into user-provided \code{folds}, their prediction value would be \code{NA}. } -\description{ -Callback closure for returning cross-validation based predictions. -} -\details{ -This callback function saves predictions for all of the test folds, -and also allows to save the folds' models. - -It is a "finalizer" callback and it uses early stopping information whenever it is available, -thus it must be run after the early stopping callback if the early stopping is used. - -Callback function expects the following values to be set in its calling frame: -\code{bst_folds}, -\code{basket}, -\code{data}, -\code{end_iteration}, -\code{params}, -} -\seealso{ -\code{\link{callbacks}} -} diff --git a/R-package/man/xgb.cb.early.stop.Rd b/R-package/man/xgb.cb.early.stop.Rd new file mode 100644 index 000000000..26d2f1aa3 --- /dev/null +++ b/R-package/man/xgb.cb.early.stop.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/callbacks.R +\name{xgb.cb.early.stop} +\alias{xgb.cb.early.stop} +\title{Callback to activate early stopping} +\usage{ +xgb.cb.early.stop( + stopping_rounds, + maximize = FALSE, + metric_name = NULL, + verbose = TRUE, + keep_all_iter = TRUE +) +} +\arguments{ +\item{stopping_rounds}{The number of rounds with no improvement in +the evaluation metric in order to stop the training.} + +\item{maximize}{Whether to maximize the evaluation metric.} + +\item{metric_name}{The name of an evaluation column to use as a criteria for early +stopping. If not set, the last column would be used. +Let's say the test data in \code{watchlist} was labelled as \code{dtest}, +and one wants to use the AUC in test data for early stopping regardless of where +it is in the \code{watchlist}, then one of the following would need to be set: +\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}. +All dash '-' characters in metric names are considered equivalent to '_'.} + +\item{verbose}{Whether to print the early stopping information.} + +\item{keep_all_iter}{Whether to keep all of the boosting rounds that were produced +in the resulting object. If passing \code{FALSE}, will only keep the boosting rounds +up to the detected best iteration, discarding the ones that come after.} +} +\value{ +An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +} +\description{ +This callback function determines the condition for early stopping. + +The following attributes are assigned to the booster's object: +\itemize{ +\item \code{best_score} the evaluation score at the best iteration +\item \code{best_iteration} at which boosting iteration the best score has occurred +(0-based index for interoperability of binary models) +} + +The same values are also stored as R attributes as a result of the callback, plus an additional +attribute \code{stopped_by_max_rounds} which indicates whether an early stopping by the \code{stopping_rounds} +condition occurred. Note that the \code{best_iteration} that is stored under R attributes will follow +base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed +through \link{xgb.attr} or \link{xgb.attributes}. + +At least one data element is required in the evaluation watchlist for early stopping to work. +} diff --git a/R-package/man/xgb.cb.evaluation.log.Rd b/R-package/man/xgb.cb.evaluation.log.Rd new file mode 100644 index 000000000..1dab64647 --- /dev/null +++ b/R-package/man/xgb.cb.evaluation.log.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/callbacks.R +\name{xgb.cb.evaluation.log} +\alias{xgb.cb.evaluation.log} +\title{Callback for logging the evaluation history} +\usage{ +xgb.cb.evaluation.log() +} +\value{ +An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +} +\description{ +Callback for logging the evaluation history +} +\details{ +This callback creates a table with per-iteration evaluation metrics (see parameters +\code{watchlist} and \code{feval} in \link{xgb.train}). + +Note: in the column names of the final data.table, the dash '-' character is replaced with +the underscore '_' in order to make the column names more like regular R identifiers. +} +\seealso{ +\link{xgb.cb.print.evaluation} +} diff --git a/R-package/man/cb.gblinear.history.Rd b/R-package/man/xgb.cb.gblinear.history.Rd similarity index 63% rename from R-package/man/cb.gblinear.history.Rd rename to R-package/man/xgb.cb.gblinear.history.Rd index 2a03c14db..0ebaa4685 100644 --- a/R-package/man/cb.gblinear.history.Rd +++ b/R-package/man/xgb.cb.gblinear.history.Rd @@ -1,37 +1,48 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/callbacks.R -\name{cb.gblinear.history} -\alias{cb.gblinear.history} -\title{Callback closure for collecting the model coefficients history of a gblinear booster -during its training.} +\name{xgb.cb.gblinear.history} +\alias{xgb.cb.gblinear.history} +\title{Callback for collecting coefficients history of a gblinear booster} \usage{ -cb.gblinear.history(sparse = FALSE) +xgb.cb.gblinear.history(sparse = FALSE) } \arguments{ -\item{sparse}{when set to FALSE/TRUE, a dense/sparse matrix is used to store the result. +\item{sparse}{when set to \code{FALSE}/\code{TRUE}, a dense/sparse matrix is used to store the result. Sparse format is useful when one expects only a subset of coefficients to be non-zero, when using the "thrifty" feature selector with fairly small number of top features selected per iteration.} } \value{ -Results are stored in the \code{coefs} element of the closure. -The \code{\link{xgb.gblinear.history}} convenience function provides an easy -way to access it. -With \code{xgb.train}, it is either a dense of a sparse matrix. -While with \code{xgb.cv}, it is a list (an element per each fold) of such -matrices. +An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}. } \description{ -Callback closure for collecting the model coefficients history of a gblinear booster -during its training. +Callback for collecting coefficients history of a gblinear booster } \details{ To keep things fast and simple, gblinear booster does not internally store the history of linear model coefficients at each boosting iteration. This callback provides a workaround for storing the coefficients' path, by extracting them after each training iteration. -Callback function expects the following values to be set in its calling frame: -\code{bst} (or \code{bst_folds}). +This callback will construct a matrix where rows are boosting iterations and columns are +feature coefficients (same order as when calling \link{coef.xgb.Booster}, with the intercept +corresponding to the first column). + +When there is more than one coefficient per feature (e.g. multi-class classification), +the result will be reshaped into a vector where coefficients are arranged first by features and +then by class (e.g. first 1 through N coefficients will be for the first class, then +coefficients N+1 through 2N for the second class, and so on). + +If the result has only one coefficient per feature in the data, then the resulting matrix +will have column names matching with the feature names, otherwise (when there's more than +one coefficient per feature) the names will be composed as 'column name' + ':' + 'class index' +(so e.g. column 'c1' for class '0' will be named 'c1:0'). + +With \code{xgb.train}, the output is either a dense or a sparse matrix. +With with \code{xgb.cv}, it is a list (one element per each fold) of such +matrices. + +Function \link{xgb.gblinear.history} function provides an easy way to retrieve the +outputs from this callback. } \examples{ #### Binary classification: @@ -52,7 +63,7 @@ param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "a # rate does not break the convergence, but allows us to illustrate the typical pattern of # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations. bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1., - callbacks = list(cb.gblinear.history())) + callbacks = list(xgb.cb.gblinear.history())) # Extract the coefficients' path and plot them vs boosting iteration number: coef_path <- xgb.gblinear.history(bst) matplot(coef_path, type = 'l') @@ -61,7 +72,7 @@ matplot(coef_path, type = 'l') # Will try the classical componentwise boosting which selects a single best feature per round: bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8, updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1, - callbacks = list(cb.gblinear.history())) + callbacks = list(xgb.cb.gblinear.history())) matplot(xgb.gblinear.history(bst), type = 'l') # Componentwise boosting is known to have similar effect to Lasso regularization. # Try experimenting with various values of top_k, eta, nrounds, @@ -69,7 +80,7 @@ matplot(xgb.gblinear.history(bst), type = 'l') # For xgb.cv: bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8, - callbacks = list(cb.gblinear.history())) + callbacks = list(xgb.cb.gblinear.history())) # coefficients in the CV fold #3 matplot(xgb.gblinear.history(bst)[[3]], type = 'l') @@ -82,7 +93,7 @@ param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3, # For the default linear updater 'shotgun' it sometimes is helpful # to use smaller eta to reduce instability bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5, - callbacks = list(cb.gblinear.history())) + callbacks = list(xgb.cb.gblinear.history())) # Will plot the coefficient paths separately for each class: matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l') matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l') @@ -90,11 +101,11 @@ matplot(xgb.gblinear.history(bst, class_index = 2), type = 'l') # CV: bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5, - callbacks = list(cb.gblinear.history(FALSE))) + callbacks = list(xgb.cb.gblinear.history(FALSE))) # 1st fold of 1st class matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l') } \seealso{ -\code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}. +\link{xgb.gblinear.history}, \link{coef.xgb.Booster}. } diff --git a/R-package/man/xgb.cb.print.evaluation.Rd b/R-package/man/xgb.cb.print.evaluation.Rd new file mode 100644 index 000000000..c4f2e6991 --- /dev/null +++ b/R-package/man/xgb.cb.print.evaluation.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/callbacks.R +\name{xgb.cb.print.evaluation} +\alias{xgb.cb.print.evaluation} +\title{Callback for printing the result of evaluation} +\usage{ +xgb.cb.print.evaluation(period = 1, showsd = TRUE) +} +\arguments{ +\item{period}{results would be printed every number of periods} + +\item{showsd}{whether standard deviations should be printed (when available)} +} +\value{ +An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +} +\description{ +The callback function prints the result of evaluation at every \code{period} iterations. +The initial and the last iteration's evaluations are always printed. + +Does not leave any attribute in the booster (see \link{xgb.cb.evaluation.log} for that). +} +\seealso{ +\link{xgb.Callback} +} diff --git a/R-package/man/cb.reset.parameters.Rd b/R-package/man/xgb.cb.reset.parameters.Rd similarity index 57% rename from R-package/man/cb.reset.parameters.Rd rename to R-package/man/xgb.cb.reset.parameters.Rd index ee0a5d1bd..c7e863817 100644 --- a/R-package/man/cb.reset.parameters.Rd +++ b/R-package/man/xgb.cb.reset.parameters.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/callbacks.R -\name{cb.reset.parameters} -\alias{cb.reset.parameters} -\title{Callback closure for resetting the booster's parameters at each iteration.} +\name{xgb.cb.reset.parameters} +\alias{xgb.cb.reset.parameters} +\title{Callback for resetting the booster's parameters at each iteration.} \usage{ -cb.reset.parameters(new_params) +xgb.cb.reset.parameters(new_params) } \arguments{ \item{new_params}{a list where each element corresponds to a parameter that needs to be reset. @@ -14,23 +14,16 @@ or a function of two parameters \code{learning_rates(iteration, nrounds)} which returns a new parameter value by using the current iteration number and the total number of boosting rounds.} } +\value{ +An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}. +} \description{ -Callback closure for resetting the booster's parameters at each iteration. +Callback for resetting the booster's parameters at each iteration. } \details{ -This is a "pre-iteration" callback function used to reset booster's parameters -at the beginning of each iteration. - Note that when training is resumed from some previous model, and a function is used to reset a parameter value, the \code{nrounds} argument in this function would be the the number of boosting rounds in the current training. -Callback function expects the following values to be set in its calling frame: -\code{bst} or \code{bst_folds}, -\code{iteration}, -\code{begin_iteration}, -\code{end_iteration}. -} -\seealso{ -\code{\link{callbacks}} +Does not leave any attribute in the booster. } diff --git a/R-package/man/xgb.cb.save.model.Rd b/R-package/man/xgb.cb.save.model.Rd new file mode 100644 index 000000000..8ddba2f1a --- /dev/null +++ b/R-package/man/xgb.cb.save.model.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/callbacks.R +\name{xgb.cb.save.model} +\alias{xgb.cb.save.model} +\title{Callback for saving a model file.} +\usage{ +xgb.cb.save.model(save_period = 0, save_name = "xgboost.ubj") +} +\arguments{ +\item{save_period}{Save the model to disk after every +\code{save_period} iterations; 0 means save the model at the end.} + +\item{save_name}{The name or path for the saved model file. +It can contain a \code{\link[base]{sprintf}} formatting specifier +to include the integer iteration number in the file name. +E.g., with \code{save_name} = 'xgboost_\%04d.model', +the file saved at iteration 50 would be named "xgboost_0050.model".} +} +\value{ +An \code{xgb.Callback} object, which can be passed to \link{xgb.train}, +but \bold{not} to \link{xgb.cv}. +} +\description{ +This callback function allows to save an xgb-model file, either periodically +after each \code{save_period}'s or at the end. + +Does not leave any attribute in the booster. +} diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd index 9f6103a52..778b4540a 100644 --- a/R-package/man/xgb.cv.Rd +++ b/R-package/man/xgb.cv.Rd @@ -59,7 +59,7 @@ that NA values should be considered as 'missing' by the algorithm. Sometimes, 0 or other extreme value might be used to represent missing values.} \item{prediction}{A logical value indicating whether to return the test fold predictions -from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.} +from each CV model. This parameter engages the \code{\link{xgb.cb.cv.predict}} callback.} \item{showsd}{\code{boolean}, whether to show standard deviation of cross validation} @@ -98,20 +98,20 @@ the \code{nfold} and \code{stratified} parameters are ignored.} \item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}. Default is 1 which means all messages are printed. This parameter is passed to the -\code{\link{cb.print.evaluation}} callback.} +\code{\link{xgb.cb.print.evaluation}} callback.} \item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered. If set to an integer \code{k}, training with a validation set will stop if the performance doesn't improve for \code{k} rounds. -Setting this parameter engages the \code{\link{cb.early.stop}} callback.} +Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.} \item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set, then this parameter must be set as well. When it is \code{TRUE}, it means the larger the evaluation score the better. -This parameter is passed to the \code{\link{cb.early.stop}} callback.} +This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.} \item{callbacks}{a list of callback functions to perform various task during boosting. -See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the +See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the parameters' values. User can provide either existing or their own callback methods in order to customize the training process.} @@ -122,24 +122,24 @@ An object of class \code{xgb.cv.synchronous} with the following elements: \itemize{ \item \code{call} a function call. \item \code{params} parameters that were passed to the xgboost library. Note that it does not -capture parameters changed by the \code{\link{cb.reset.parameters}} callback. -\item \code{callbacks} callback functions that were either automatically assigned or -explicitly passed. +capture parameters changed by the \code{\link{xgb.cb.reset.parameters}} callback. \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the first column corresponding to iteration number and the rest corresponding to the CV-based evaluation means and standard deviations for the training and test CV-sets. -It is created by the \code{\link{cb.evaluation.log}} callback. +It is created by the \code{\link{xgb.cb.evaluation.log}} callback. \item \code{niter} number of boosting iterations. \item \code{nfeatures} number of features in training data. \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds} parameter or randomly generated. \item \code{best_iteration} iteration number with the best evaluation metric value (only available with early stopping). -\item \code{pred} CV prediction values available when \code{prediction} is set. -It is either vector or matrix (see \code{\link{cb.cv.predict}}). -\item \code{models} a list of the CV folds' models. It is only available with the explicit -setting of the \code{cb.cv.predict(save_models = TRUE)} callback. } + +Plus other potential elements that are the result of callbacks, such as a list \code{cv_predict} with +a sub-element \code{pred} when passing \code{prediction = TRUE}, which is added by the \link{xgb.cb.cv.predict} +callback (note that one can also pass it manually under \code{callbacks} with different settings, +such as saving also the models created during cross validation); or a list \code{early_stop} which +will contain elements such as \code{best_iteration} when using the early stopping callback (\link{xgb.cb.early.stop}). } \description{ The cross validation function of xgboost diff --git a/R-package/man/xgb.gblinear.history.Rd b/R-package/man/xgb.gblinear.history.Rd index 103be16f1..25aef7163 100644 --- a/R-package/man/xgb.gblinear.history.Rd +++ b/R-package/man/xgb.gblinear.history.Rd @@ -8,7 +8,7 @@ xgb.gblinear.history(model, class_index = NULL) } \arguments{ \item{model}{either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained -using the \code{cb.gblinear.history()} callback, but \bold{not} a booster +using the \link{xgb.cb.gblinear.history} callback, but \bold{not} a booster loaded from \link{xgb.load} or \link{xgb.load.raw}.} \item{class_index}{zero-based class index to extract the coefficients for only that @@ -16,23 +16,31 @@ specific class in a multinomial multiclass model. When it is NULL, all the coefficients are returned. Has no effect in non-multiclass models.} } \value{ -For an \code{xgb.train} result, a matrix (either dense or sparse) with the columns -corresponding to iteration's coefficients (in the order as \code{xgb.dump()} would -return) and the rows corresponding to boosting iterations. +For an \link{xgb.train} result, a matrix (either dense or sparse) with the columns +corresponding to iteration's coefficients and the rows corresponding to boosting iterations. -For an \code{xgb.cv} result, a list of such matrices is returned with the elements +For an \link{xgb.cv} result, a list of such matrices is returned with the elements corresponding to CV folds. + +When there is more than one coefficient per feature (e.g. multi-class classification) +and \code{class_index} is not provided, +the result will be reshaped into a vector where coefficients are arranged first by features and +then by class (e.g. first 1 through N coefficients will be for the first class, then +coefficients N+1 through 2N for the second class, and so on). } \description{ A helper function to extract the matrix of linear coefficients' history -from a gblinear model created while using the \code{cb.gblinear.history()} -callback. +from a gblinear model created while using the \link{xgb.cb.gblinear.history} +callback (which must be added manually as by default it's not used). } \details{ Note that this is an R-specific function that relies on R attributes that are not saved when using xgboost's own serialization functions like \link{xgb.load} or \link{xgb.load.raw}. -In order for a serialized model to be accepted by tgis function, one must use R +In order for a serialized model to be accepted by this function, one must use R serializers such as \link{saveRDS}. } +\seealso{ +\link{xgb.cb.gblinear.history}, \link{coef.xgb.Booster}. +} diff --git a/R-package/man/xgb.load.Rd b/R-package/man/xgb.load.Rd index 1fbe0055e..e18a900e3 100644 --- a/R-package/man/xgb.load.Rd +++ b/R-package/man/xgb.load.Rd @@ -17,7 +17,7 @@ Load xgboost model from the binary model file. } \details{ The input file is expected to contain a model saved in an xgboost model format -using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some +using either \code{\link{xgb.save}} or \code{\link{xgb.cb.save.model}} in R, or using some appropriate methods from other xgboost interfaces. E.g., a model trained in Python and saved from there in xgboost format, could be loaded from R. diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index 21c5fe7ee..45c78ae13 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -162,7 +162,7 @@ List is provided in detail section.} Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each of these datasets during each boosting iteration, and stored in the end as a field named \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or -\code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously +\code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously printed out during the training. E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track the performance of each round's model on mat1 and mat2.} @@ -177,24 +177,24 @@ prediction and dtrain.} \item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance. If 2, some additional information will be printed out. Note that setting \code{verbose > 0} automatically engages the -\code{cb.print.evaluation(period=1)} callback function.} +\code{xgb.cb.print.evaluation(period=1)} callback function.} \item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}. Default is 1 which means all messages are printed. This parameter is passed to the -\code{\link{cb.print.evaluation}} callback.} +\code{\link{xgb.cb.print.evaluation}} callback.} \item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered. If set to an integer \code{k}, training with a validation set will stop if the performance doesn't improve for \code{k} rounds. -Setting this parameter engages the \code{\link{cb.early.stop}} callback.} +Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.} \item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set, then this parameter must be set as well. When it is \code{TRUE}, it means the larger the evaluation score the better. -This parameter is passed to the \code{\link{cb.early.stop}} callback.} +This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.} \item{save_period}{when it is non-NULL, model is saved to disk after every \code{save_period} rounds, -0 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback.} +0 means save at the end. The saving is handled by the \code{\link{xgb.cb.save.model}} callback.} \item{save_name}{the name or path for periodically saved model file.} @@ -203,12 +203,13 @@ Could be either an object of class \code{xgb.Booster}, or its raw data, or the n file with a previously saved model.} \item{callbacks}{a list of callback functions to perform various task during boosting. -See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the +See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the parameters' values. User can provide either existing or their own callback methods in order to customize the training process. -\if{html}{\out{
}}\preformatted{ Note that some callbacks might try to set an evaluation log - be aware that these evaluation logs - are kept as R attributes, and thus do not get saved when using non-R serializaters like +\if{html}{\out{
}}\preformatted{ Note that some callbacks might try to leave attributes in the resulting model object, + such as an evaluation log (a `data.table` object) - be aware that these objects are kept + as R attributes, and thus do not get saved when using XGBoost's own serializaters like \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}). }\if{html}{\out{
}}} @@ -269,18 +270,19 @@ Different threshold (e.g., 0.) could be specified as "error@0." The following callbacks are automatically created when certain parameters are set: \itemize{ -\item \code{cb.print.evaluation} is turned on when \code{verbose > 0}; +\item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0}; and the \code{print_every_n} parameter is passed to it. -\item \code{cb.evaluation.log} is on when \code{watchlist} is present. -\item \code{cb.early.stop}: when \code{early_stopping_rounds} is set. -\item \code{cb.save.model}: when \code{save_period > 0} is set. +\item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present. +\item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set. +\item \code{xgb.cb.save.model}: when \code{save_period > 0} is set. } Note that objects of type \code{xgb.Booster} as returned by this function behave a bit differently from typical R objects (it's an 'altrep' list class), and it makes a separation between internal booster attributes (restricted to jsonifyable data), accessed through \link{xgb.attr} and shared between interfaces through serialization functions like \link{xgb.save}; and -R-specific attributes, accessed through \link{attributes} and \link{attr}, which are otherwise +R-specific attributes (typically the result from a callback), accessed through \link{attributes} +and \link{attr}, which are otherwise only used in the R interface, only kept when using R's serializers like \link{saveRDS}, and not anyhow used by functions like \link{predict.xgb.Booster}. @@ -348,7 +350,7 @@ param <- list(max_depth = 2, eta = 1, nthread = nthread, objective = "binary:logistic", eval_metric = "auc") my_etas <- list(eta = c(0.5, 0.1)) bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, - callbacks = list(cb.reset.parameters(my_etas))) + callbacks = list(xgb.cb.reset.parameters(my_etas))) ## Early stopping: bst <- xgb.train(param, dtrain, nrounds = 25, watchlist, @@ -366,7 +368,7 @@ Tianqi Chen and Carlos Guestrin, "XGBoost: A Scalable Tree Boosting System", 22nd SIGKDD Conference on Knowledge Discovery and Data Mining, 2016, \url{https://arxiv.org/abs/1603.02754} } \seealso{ -\code{\link{callbacks}}, +\code{\link{xgb.Callback}}, \code{\link{predict.xgb.Booster}}, \code{\link{xgb.cv}} } diff --git a/R-package/src/init.c b/R-package/src/init.c index f2635742e..c869871c6 100644 --- a/R-package/src/init.c +++ b/R-package/src/init.c @@ -76,6 +76,7 @@ extern SEXP XGBSetGlobalConfig_R(SEXP); extern SEXP XGBGetGlobalConfig_R(void); extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP); extern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP); +extern SEXP XGBoosterSliceAndReplace_R(SEXP, SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"XGDuplicate_R", (DL_FUNC) &XGDuplicate_R, 1}, @@ -138,6 +139,7 @@ static const R_CallMethodDef CallEntries[] = { {"XGBGetGlobalConfig_R", (DL_FUNC) &XGBGetGlobalConfig_R, 0}, {"XGBoosterFeatureScore_R", (DL_FUNC) &XGBoosterFeatureScore_R, 2}, {"XGBoosterSlice_R", (DL_FUNC) &XGBoosterSlice_R, 4}, + {"XGBoosterSliceAndReplace_R", (DL_FUNC) &XGBoosterSliceAndReplace_R, 4}, {NULL, NULL, 0} }; diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index 5baf8d412..2228932bd 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -1674,3 +1674,18 @@ XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEX Rf_unprotect(1); return out; } + +XGB_DLL SEXP XGBoosterSliceAndReplace_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) { + R_API_BEGIN(); + BoosterHandle old_handle = R_ExternalPtrAddr(handle); + BoosterHandle new_handle = nullptr; + CHECK_CALL(XGBoosterSlice(old_handle, + Rf_asInteger(begin_layer), + Rf_asInteger(end_layer), + Rf_asInteger(step), + &new_handle)); + R_SetExternalPtrAddr(handle, new_handle); + CHECK_CALL(XGBoosterFree(old_handle)); + R_API_END(); + return R_NilValue; +} diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index 70fd885e7..cea50c146 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -535,4 +535,14 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config); */ XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step); +/*! + * \brief Slice a fitted booster model (by rounds), and replace its handle with the result + * \param handle handle to the fitted booster + * \param begin_layer start of the slice + * \param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round + * \param step step size of the slice + * \return NULL + */ +XGB_DLL SEXP XGBoosterSliceAndReplace_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step); + #endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*) diff --git a/R-package/tests/testthat.R b/R-package/tests/testthat.R index 7cf711292..bad6c1df3 100644 --- a/R-package/tests/testthat.R +++ b/R-package/tests/testthat.R @@ -1,5 +1,6 @@ library(testthat) library(xgboost) +library(Matrix) test_check("xgboost", reporter = ProgressReporter) RhpcBLASctl::omp_set_num_threads(1) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 5438c8bb2..ee0f4c7ba 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -348,7 +348,6 @@ test_that("xgb.cv works", { expect_false(is.null(cv$folds) && is.list(cv$folds)) expect_length(cv$folds, 5) expect_false(is.null(cv$params) && is.list(cv$params)) - expect_false(is.null(cv$callbacks)) expect_false(is.null(cv$call)) }) diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R index c60d0c246..a0b4910cc 100644 --- a/R-package/tests/testthat/test_callbacks.R +++ b/R-package/tests/testthat/test_callbacks.R @@ -28,79 +28,125 @@ param <- list(objective = "binary:logistic", eval_metric = "error", max_depth = 2, nthread = n_threads) -test_that("cb.print.evaluation works as expected", { +test_that("xgb.cb.print.evaluation works as expected for xgb.train", { + logs1 <- capture.output({ + model <- xgb.train( + data = dtrain, + params = list( + objective = "binary:logistic", + eval_metric = "auc", + max_depth = 2, + nthread = n_threads + ), + nrounds = 10, + watchlist = list(train = dtrain, test = dtest), + callbacks = list(xgb.cb.print.evaluation(period = 1)) + ) + }) + expect_equal(length(logs1), 10) + expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+\ttest-auc:0\\.\\d+\\s*$", logs1))) + lapply(seq(1, 10), function(x) expect_true(grepl(paste0("^\\[", x), logs1[x]))) - bst_evaluation <- c('train-auc' = 0.9, 'test-auc' = 0.8) - bst_evaluation_err <- NULL - begin_iteration <- 1 - end_iteration <- 7 - - f0 <- cb.print.evaluation(period = 0) - f1 <- cb.print.evaluation(period = 1) - f5 <- cb.print.evaluation(period = 5) - - expect_false(is.null(attr(f1, 'call'))) - expect_equal(attr(f1, 'name'), 'cb.print.evaluation') - - iteration <- 1 - expect_silent(f0()) - expect_output(f1(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000") - expect_output(f5(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000") - expect_null(f1()) - - iteration <- 2 - expect_output(f1(), "\\[2\\]\ttrain-auc:0.900000\ttest-auc:0.800000") - expect_silent(f5()) - - iteration <- 7 - expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000") - expect_output(f5(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000") - - bst_evaluation_err <- c('train-auc' = 0.1, 'test-auc' = 0.2) - expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000±0.100000\ttest-auc:0.800000±0.200000") + logs2 <- capture.output({ + model <- xgb.train( + data = dtrain, + params = list( + objective = "binary:logistic", + eval_metric = "auc", + max_depth = 2, + nthread = n_threads + ), + nrounds = 10, + watchlist = list(train = dtrain, test = dtest), + callbacks = list(xgb.cb.print.evaluation(period = 2)) + ) + }) + expect_equal(length(logs2), 6) + expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+\ttest-auc:0\\.\\d+\\s*$", logs2))) + seq_matches <- c(seq(1, 10, 2), 10) + lapply(seq_along(seq_matches), function(x) expect_true(grepl(paste0("^\\[", seq_matches[x]), logs2[x]))) }) -test_that("cb.evaluation.log works as expected", { +test_that("xgb.cb.print.evaluation works as expected for xgb.cv", { + logs1 <- capture.output({ + model <- xgb.cv( + data = dtrain, + params = list( + objective = "binary:logistic", + eval_metric = "auc", + max_depth = 2, + nthread = n_threads + ), + nrounds = 10, + nfold = 3, + callbacks = list(xgb.cb.print.evaluation(period = 1, showsd = TRUE)) + ) + }) + expect_equal(length(logs1), 10) + expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+±0\\.\\d+\ttest-auc:0\\.\\d+±0\\.\\d+\\s*$", logs1))) + lapply(seq(1, 10), function(x) expect_true(grepl(paste0("^\\[", x), logs1[x]))) - bst_evaluation <- c('train-auc' = 0.9, 'test-auc' = 0.8) - bst_evaluation_err <- NULL + logs2 <- capture.output({ + model <- xgb.cv( + data = dtrain, + params = list( + objective = "binary:logistic", + eval_metric = "auc", + max_depth = 2, + nthread = n_threads + ), + nrounds = 10, + nfold = 3, + callbacks = list(xgb.cb.print.evaluation(period = 2, showsd = TRUE)) + ) + }) + expect_equal(length(logs2), 6) + expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+±0\\.\\d+\ttest-auc:0\\.\\d+±0\\.\\d+\\s*$", logs2))) + seq_matches <- c(seq(1, 10, 2), 10) + lapply(seq_along(seq_matches), function(x) expect_true(grepl(paste0("^\\[", seq_matches[x]), logs2[x]))) +}) - evaluation_log <- list() - f <- cb.evaluation.log() +test_that("xgb.cb.evaluation.log works as expected for xgb.train", { + model <- xgb.train( + data = dtrain, + params = list( + objective = "binary:logistic", + eval_metric = "auc", + max_depth = 2, + nthread = n_threads + ), + nrounds = 10, + verbose = FALSE, + watchlist = list(train = dtrain, test = dtest), + callbacks = list(xgb.cb.evaluation.log()) + ) + logs <- attributes(model)$evaluation_log - expect_false(is.null(attr(f, 'call'))) - expect_equal(attr(f, 'name'), 'cb.evaluation.log') + expect_equal(nrow(logs), 10) + expect_equal(colnames(logs), c("iter", "train_auc", "test_auc")) +}) - iteration <- 1 - expect_silent(f()) - expect_equal(evaluation_log, - list(c(iter = 1, bst_evaluation))) - iteration <- 2 - expect_silent(f()) - expect_equal(evaluation_log, - list(c(iter = 1, bst_evaluation), c(iter = 2, bst_evaluation))) - expect_silent(f(finalize = TRUE)) - expect_equal(evaluation_log, - data.table::data.table(iter = 1:2, train_auc = c(0.9, 0.9), test_auc = c(0.8, 0.8))) +test_that("xgb.cb.evaluation.log works as expected for xgb.cv", { + model <- xgb.cv( + data = dtrain, + params = list( + objective = "binary:logistic", + eval_metric = "auc", + max_depth = 2, + nthread = n_threads + ), + nrounds = 10, + verbose = FALSE, + nfold = 3, + callbacks = list(xgb.cb.evaluation.log()) + ) + logs <- model$evaluation_log - bst_evaluation_err <- c('train-auc' = 0.1, 'test-auc' = 0.2) - evaluation_log <- list() - f <- cb.evaluation.log() - - iteration <- 1 - expect_silent(f()) - expect_equal(evaluation_log, - list(c(iter = 1, c(bst_evaluation, bst_evaluation_err)))) - iteration <- 2 - expect_silent(f()) - expect_equal(evaluation_log, - list(c(iter = 1, c(bst_evaluation, bst_evaluation_err)), - c(iter = 2, c(bst_evaluation, bst_evaluation_err)))) - expect_silent(f(finalize = TRUE)) - expect_equal(evaluation_log, - data.table::data.table(iter = 1:2, - train_auc_mean = c(0.9, 0.9), train_auc_std = c(0.1, 0.1), - test_auc_mean = c(0.8, 0.8), test_auc_std = c(0.2, 0.2))) + expect_equal(nrow(logs), 10) + expect_equal( + colnames(logs), + c("iter", "train_auc_mean", "train_auc_std", "test_auc_mean", "test_auc_std") + ) }) @@ -116,7 +162,7 @@ test_that("can store evaluation_log without printing", { expect_lt(attributes(bst)$evaluation_log[, min(train_error)], 0.2) }) -test_that("cb.reset.parameters works as expected", { +test_that("xgb.cb.reset.parameters works as expected", { # fixed eta set.seed(111) @@ -128,7 +174,7 @@ test_that("cb.reset.parameters works as expected", { set.seed(111) my_par <- list(eta = c(0.9, 0.9)) bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, - callbacks = list(cb.reset.parameters(my_par))) + callbacks = list(xgb.cb.reset.parameters(my_par))) expect_false(is.null(attributes(bst1)$evaluation_log$train_error)) expect_equal(attributes(bst0)$evaluation_log$train_error, attributes(bst1)$evaluation_log$train_error) @@ -137,7 +183,7 @@ test_that("cb.reset.parameters works as expected", { set.seed(111) my_par <- list(eta = function(itr, itr_end) 0.9) bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, - callbacks = list(cb.reset.parameters(my_par))) + callbacks = list(xgb.cb.reset.parameters(my_par))) expect_false(is.null(attributes(bst2)$evaluation_log$train_error)) expect_equal(attributes(bst0)$evaluation_log$train_error, attributes(bst2)$evaluation_log$train_error) @@ -146,7 +192,7 @@ test_that("cb.reset.parameters works as expected", { set.seed(111) my_par <- list(eta = c(0.6, 0.5)) bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, - callbacks = list(cb.reset.parameters(my_par))) + callbacks = list(xgb.cb.reset.parameters(my_par))) expect_false(is.null(attributes(bst3)$evaluation_log$train_error)) expect_false(all(attributes(bst0)$evaluation_log$train_error == attributes(bst3)$evaluation_log$train_error)) @@ -154,25 +200,25 @@ test_that("cb.reset.parameters works as expected", { my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8)) expect_error( bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, - callbacks = list(cb.reset.parameters(my_par))) + callbacks = list(xgb.cb.reset.parameters(my_par))) , NA) # NA = no error # CV works as well expect_error( bst4 <- xgb.cv(param, dtrain, nfold = 2, nrounds = 2, verbose = 0, - callbacks = list(cb.reset.parameters(my_par))) + callbacks = list(xgb.cb.reset.parameters(my_par))) , NA) # NA = no error # expect no learning with 0 learning rate my_par <- list(eta = c(0., 0.)) bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, - callbacks = list(cb.reset.parameters(my_par))) + callbacks = list(xgb.cb.reset.parameters(my_par))) expect_false(is.null(attributes(bstX)$evaluation_log$train_error)) er <- unique(attributes(bstX)$evaluation_log$train_error) expect_length(er, 1) expect_gt(er, 0.4) }) -test_that("cb.save.model works as expected", { +test_that("xgb.cb.save.model works as expected", { files <- c('xgboost_01.json', 'xgboost_02.json', 'xgboost.json') files <- unname(sapply(files, function(f) file.path(tempdir(), f))) for (f in files) if (file.exists(f)) file.remove(f) @@ -238,8 +284,8 @@ test_that("early stopping using a specific metric works", { expect_output( bst <- xgb.train(param[-2], dtrain, nrounds = 20, watchlist, eta = 0.6, eval_metric = "logloss", eval_metric = "auc", - callbacks = list(cb.early.stop(stopping_rounds = 3, maximize = FALSE, - metric_name = 'test_logloss'))) + callbacks = list(xgb.cb.early.stop(stopping_rounds = 3, maximize = FALSE, + metric_name = 'test_logloss'))) , "Stopping. Best iteration") expect_false(is.null(xgb.attr(bst, "best_iteration"))) expect_lt(xgb.attr(bst, "best_iteration"), 19) @@ -281,10 +327,10 @@ test_that("early stopping xgb.cv works", { cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.3, nrounds = 20, early_stopping_rounds = 3, maximize = FALSE) , "Stopping. Best iteration") - expect_false(is.null(cv$best_iteration)) - expect_lt(cv$best_iteration, 19) + expect_false(is.null(cv$early_stop$best_iteration)) + expect_lt(cv$early_stop$best_iteration, 19) # the best error is min error: - expect_true(cv$evaluation_log[, test_error_mean[cv$best_iteration] == min(test_error_mean)]) + expect_true(cv$evaluation_log[, test_error_mean[cv$early_stop$best_iteration] == min(test_error_mean)]) }) test_that("prediction in xgb.cv works", { @@ -292,19 +338,19 @@ test_that("prediction in xgb.cv works", { nrounds <- 4 cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0) expect_false(is.null(cv$evaluation_log)) - expect_false(is.null(cv$pred)) - expect_length(cv$pred, nrow(train$data)) - err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f])))) + expect_false(is.null(cv$cv_predict$pred)) + expect_length(cv$cv_predict$pred, nrow(train$data)) + err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$cv_predict$pred[f])))) err_log <- cv$evaluation_log[nrounds, test_error_mean] expect_equal(err_pred, err_log, tolerance = 1e-6) # save CV models set.seed(11) cvx <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0, - callbacks = list(cb.cv.predict(save_models = TRUE))) + callbacks = list(xgb.cb.cv.predict(save_models = TRUE))) expect_equal(cv$evaluation_log, cvx$evaluation_log) - expect_length(cvx$models, 5) - expect_true(all(sapply(cvx$models, class) == 'xgb.Booster')) + expect_length(cvx$cv_predict$models, 5) + expect_true(all(sapply(cvx$cv_predict$models, class) == 'xgb.Booster')) }) test_that("prediction in xgb.cv works for gblinear too", { @@ -312,8 +358,8 @@ test_that("prediction in xgb.cv works for gblinear too", { p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads) cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0) expect_false(is.null(cv$evaluation_log)) - expect_false(is.null(cv$pred)) - expect_length(cv$pred, nrow(train$data)) + expect_false(is.null(cv$cv_predict$pred)) + expect_length(cv$cv_predict$pred, nrow(train$data)) }) test_that("prediction in early-stopping xgb.cv works", { @@ -324,14 +370,14 @@ test_that("prediction in early-stopping xgb.cv works", { prediction = TRUE, base_score = 0.5) , "Stopping. Best iteration") - expect_false(is.null(cv$best_iteration)) - expect_lt(cv$best_iteration, 19) + expect_false(is.null(cv$early_stop$best_iteration)) + expect_lt(cv$early_stop$best_iteration, 19) expect_false(is.null(cv$evaluation_log)) - expect_false(is.null(cv$pred)) - expect_length(cv$pred, nrow(train$data)) + expect_false(is.null(cv$cv_predict$pred)) + expect_length(cv$cv_predict$pred, nrow(train$data)) - err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f])))) - err_log <- cv$evaluation_log[cv$best_iteration, test_error_mean] + err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$cv_predict$pred[f])))) + err_log <- cv$evaluation_log[cv$early_stop$best_iteration, test_error_mean] expect_equal(err_pred, err_log, tolerance = 1e-6) err_log_last <- cv$evaluation_log[cv$niter, test_error_mean] expect_gt(abs(err_pred - err_log_last), 1e-4) @@ -346,9 +392,9 @@ test_that("prediction in xgb.cv for softprob works", { subsample = 0.8, gamma = 2, verbose = 0, prediction = TRUE, objective = "multi:softprob", num_class = 3) , NA) - expect_false(is.null(cv$pred)) - expect_equal(dim(cv$pred), c(nrow(iris), 3)) - expect_lt(diff(range(rowSums(cv$pred))), 1e-6) + expect_false(is.null(cv$cv_predict$pred)) + expect_equal(dim(cv$cv_predict$pred), c(nrow(iris), 3)) + expect_lt(diff(range(rowSums(cv$cv_predict$pred))), 1e-6) }) test_that("prediction in xgb.cv works for multi-quantile", { @@ -368,7 +414,7 @@ test_that("prediction in xgb.cv works for multi-quantile", { prediction = TRUE, verbose = 0 ) - expect_equal(dim(cv$pred), c(nrow(x), 5)) + expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 5)) }) test_that("prediction in xgb.cv works for multi-output", { @@ -389,5 +435,46 @@ test_that("prediction in xgb.cv works for multi-output", { prediction = TRUE, verbose = 0 ) - expect_equal(dim(cv$pred), c(nrow(x), 2)) + expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 2)) +}) + +test_that("prediction in xgb.cv works for multi-quantile", { + data(mtcars) + y <- mtcars$mpg + x <- as.matrix(mtcars[, -1]) + dm <- xgb.DMatrix(x, label = y, nthread = 1) + cv <- xgb.cv( + data = dm, + params = list( + objective = "reg:quantileerror", + quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9), + nthread = 1 + ), + nrounds = 5, + nfold = 3, + prediction = TRUE, + verbose = 0 + ) + expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 5)) +}) + +test_that("prediction in xgb.cv works for multi-output", { + data(mtcars) + y <- mtcars$mpg + x <- as.matrix(mtcars[, -1]) + dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1) + cv <- xgb.cv( + data = dm, + params = list( + tree_method = "hist", + multi_strategy = "multi_output_tree", + objective = "reg:squarederror", + nthread = n_threads + ), + nrounds = 5, + nfold = 3, + prediction = TRUE, + verbose = 0 + ) + expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 2)) }) diff --git a/R-package/tests/testthat/test_glm.R b/R-package/tests/testthat/test_glm.R index 349bcce8d..c089b4fe0 100644 --- a/R-package/tests/testthat/test_glm.R +++ b/R-package/tests/testthat/test_glm.R @@ -27,7 +27,7 @@ test_that("gblinear works", { expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL) bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic', - callbacks = list(cb.gblinear.history())) + callbacks = list(xgb.cb.gblinear.history())) expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL) h <- xgb.gblinear.history(bst) expect_equal(dim(h), c(n, ncol(dtrain) + 1)) @@ -44,7 +44,7 @@ test_that("gblinear works", { expect_lt(attributes(bst)$evaluation_log$eval_error[2], ERR_UL) bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'thrifty', - top_k = 50, callbacks = list(cb.gblinear.history(sparse = TRUE))) + top_k = 50, callbacks = list(xgb.cb.gblinear.history(sparse = TRUE))) expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL) h <- xgb.gblinear.history(bst) expect_equal(dim(h), c(n, ncol(dtrain) + 1))