[R] Work on Roxygen documentation (#10674)
This commit is contained in:
parent
5db0803eb2
commit
b949a4bf7b
@ -1,9 +1,120 @@
|
||||
.reserved_cb_names <- c("names", "class", "call", "params", "niter", "nfeatures", "folds")
|
||||
|
||||
#' @title XGBoost Callback Constructor
|
||||
#' @description Constructor for defining the structure of callback functions that can be executed
|
||||
#' XGBoost Callback Constructor
|
||||
#'
|
||||
#' Constructor for defining the structure of callback functions that can be executed
|
||||
#' at different stages of model training (before / after training, before / after each boosting
|
||||
#' iteration).
|
||||
#'
|
||||
#' @details
|
||||
#' Arguments that will be passed to the supplied functions are as follows:
|
||||
#' - env The same environment that is passed under argument `env`.
|
||||
#'
|
||||
#' It may be modified by the functions in order to e.g. keep tracking of what happens
|
||||
#' across iterations or similar.
|
||||
#'
|
||||
#' This environment is only used by the functions supplied to the callback, and will
|
||||
#' not be kept after the model fitting function terminates (see parameter `f_after_training`).
|
||||
#'
|
||||
#' - model The booster object when using [xgb.train()], or the folds when using [xgb.cv()].
|
||||
#'
|
||||
#' For [xgb.cv()], folds are a list with a structure as follows:
|
||||
#' - `dtrain`: The training data for the fold (as an `xgb.DMatrix` object).
|
||||
#' - `bst`: Rhe `xgb.Booster` object for the fold.
|
||||
#' - `evals`: A list containing two DMatrices, with names `train` and `test`
|
||||
#' (`test` is the held-out data for the fold).
|
||||
#' - `index`: The indices of the hold-out data for that fold (base-1 indexing),
|
||||
#' from which the `test` entry in `evals` was obtained.
|
||||
#'
|
||||
#' This object should **not** be in-place modified in ways that conflict with the
|
||||
#' training (e.g. resetting the parameters for a training update in a way that resets
|
||||
#' the number of rounds to zero in order to overwrite rounds).
|
||||
#'
|
||||
#' Note that any R attributes that are assigned to the booster during the callback functions,
|
||||
#' will not be kept thereafter as the booster object variable is not re-assigned during
|
||||
#' training. It is however possible to set C-level attributes of the booster through
|
||||
#' [xgb.attr()] or [xgb.attributes()], which should remain available for the rest
|
||||
#' of the iterations and after the training is done.
|
||||
#'
|
||||
#' For keeping variables across iterations, it's recommended to use `env` instead.
|
||||
#' - data The data to which the model is being fit, as an `xgb.DMatrix` object.
|
||||
#'
|
||||
#' Note that, for [xgb.cv()], this will be the full data, while data for the specific
|
||||
#' folds can be found in the `model` object.
|
||||
#' - evals The evaluation data, as passed under argument `evals` to [xgb.train()].
|
||||
#'
|
||||
#' For [xgb.cv()], this will always be `NULL`.
|
||||
#' - begin_iteration Index of the first boosting iteration that will be executed (base-1 indexing).
|
||||
#'
|
||||
#' This will typically be '1', but when using training continuation, depending on the
|
||||
#' parameters for updates, boosting rounds will be continued from where the previous
|
||||
#' model ended, in which case this will be larger than 1.
|
||||
#'
|
||||
#' - end_iteration Index of the last boostign iteration that will be executed
|
||||
#' (base-1 indexing, inclusive of this end).
|
||||
#'
|
||||
#' It should match with argument `nrounds` passed to [xgb.train()] or [xgb.cv()].
|
||||
#'
|
||||
#' Note that boosting might be interrupted before reaching this last iteration, for
|
||||
#' example by using the early stopping callback \link{xgb.cb.early.stop}.
|
||||
#' - iteration Index of the iteration number that is being executed (first iteration
|
||||
#' will be the same as parameter `begin_iteration`, then next one will add +1, and so on).
|
||||
#'
|
||||
#' - iter_feval Evaluation metrics for `evals` that were supplied, either
|
||||
#' determined by the objective, or by parameter `feval`.
|
||||
#'
|
||||
#' For [xgb.train()], this will be a named vector with one entry per element in
|
||||
#' `evals`, where the names are determined as 'evals name' + '-' + 'metric name' - for
|
||||
#' example, if `evals` contains an entry named "tr" and the metric is "rmse",
|
||||
#' this will be a one-element vector with name "tr-rmse".
|
||||
#'
|
||||
#' For [xgb.cv()], this will be a 2d matrix with dimensions `[length(evals), nfolds]`,
|
||||
#' where the row names will follow the same naming logic as the one-dimensional vector
|
||||
#' that is passed in [xgb.train()].
|
||||
#'
|
||||
#' Note that, internally, the built-in callbacks such as [xgb.cb.print.evaluation] summarize
|
||||
#' this table by calculating the row-wise means and standard deviations.
|
||||
#'
|
||||
#' - final_feval The evaluation results after the last boosting round is executed
|
||||
#' (same format as `iter_feval`, and will be the exact same input as passed under
|
||||
#' `iter_feval` to the last round that is executed during model fitting).
|
||||
#'
|
||||
#' - prev_cb_res Result from a previous run of a callback sharing the same name
|
||||
#' (as given by parameter `cb_name`) when conducting training continuation, if there
|
||||
#' was any in the booster R attributes.
|
||||
#'
|
||||
#' Sometimes, one might want to append the new results to the previous one, and this will
|
||||
#' be done automatically by the built-in callbacks such as [xgb.cb.evaluation.log],
|
||||
#' which will append the new rows to the previous table.
|
||||
#'
|
||||
#' If no such previous callback result is available (which it never will when fitting
|
||||
#' a model from start instead of updating an existing model), this will be `NULL`.
|
||||
#'
|
||||
#' For [xgb.cv()], which doesn't support training continuation, this will always be `NULL`.
|
||||
#'
|
||||
#' The following names (`cb_name` values) are reserved for internal callbacks:
|
||||
#' - print_evaluation
|
||||
#' - evaluation_log
|
||||
#' - reset_parameters
|
||||
#' - early_stop
|
||||
#' - save_model
|
||||
#' - cv_predict
|
||||
#' - gblinear_history
|
||||
#'
|
||||
#' The following names are reserved for other non-callback attributes:
|
||||
#' - names
|
||||
#' - class
|
||||
#' - call
|
||||
#' - params
|
||||
#' - niter
|
||||
#' - nfeatures
|
||||
#' - folds
|
||||
#'
|
||||
#' When using the built-in early stopping callback ([xgb.cb.early.stop]), said callback
|
||||
#' will always be executed before the others, as it sets some booster C-level attributes
|
||||
#' that other callbacks might also use. Otherwise, the order of execution will match with
|
||||
#' the order in which the callbacks are passed to the model fitting function.
|
||||
#'
|
||||
#' @param cb_name Name for the callback.
|
||||
#'
|
||||
#' If the callback produces some non-NULL result (from executing the function passed under
|
||||
@ -36,137 +147,20 @@
|
||||
#' @param f_after_training A function that will be executed after training is finished.
|
||||
#'
|
||||
#' This function can optionally output something non-NULL, which will become part of the R
|
||||
#' attributes of the booster (assuming one passes `keep_extra_attributes=TRUE` to \link{xgb.train})
|
||||
#' under the name supplied for parameter `cb_name` imn the case of \link{xgb.train}; or a part
|
||||
#' of the named elements in the result of \link{xgb.cv}.
|
||||
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
#' @details Arguments that will be passed to the supplied functions are as follows:\itemize{
|
||||
#' attributes of the booster (assuming one passes `keep_extra_attributes=TRUE` to [xgb.train()])
|
||||
#' under the name supplied for parameter `cb_name` imn the case of [xgb.train()]; or a part
|
||||
#' of the named elements in the result of [xgb.cv()].
|
||||
#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].
|
||||
#'
|
||||
#' \item env The same environment that is passed under argument `env`.
|
||||
#'
|
||||
#' It may be modified by the functions in order to e.g. keep tracking of what happens
|
||||
#' across iterations or similar.
|
||||
#'
|
||||
#' This environment is only used by the functions supplied to the callback, and will
|
||||
#' not be kept after the model fitting function terminates (see parameter `f_after_training`).
|
||||
#'
|
||||
#' \item model The booster object when using \link{xgb.train}, or the folds when using
|
||||
#' \link{xgb.cv}.
|
||||
#'
|
||||
#' For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
|
||||
#' \item `dtrain`: The training data for the fold (as an `xgb.DMatrix` object).
|
||||
#' \item `bst`: Rhe `xgb.Booster` object for the fold.
|
||||
#' \item `evals`: A list containing two DMatrices, with names `train` and `test`
|
||||
#' (`test` is the held-out data for the fold).
|
||||
#' \item `index`: The indices of the hold-out data for that fold (base-1 indexing),
|
||||
#' from which the `test` entry in `evals` was obtained.
|
||||
#' }
|
||||
#'
|
||||
#' This object should \bold{not} be in-place modified in ways that conflict with the
|
||||
#' training (e.g. resetting the parameters for a training update in a way that resets
|
||||
#' the number of rounds to zero in order to overwrite rounds).
|
||||
#'
|
||||
#' Note that any R attributes that are assigned to the booster during the callback functions,
|
||||
#' will not be kept thereafter as the booster object variable is not re-assigned during
|
||||
#' training. It is however possible to set C-level attributes of the booster through
|
||||
#' \link{xgb.attr} or \link{xgb.attributes}, which should remain available for the rest
|
||||
#' of the iterations and after the training is done.
|
||||
#'
|
||||
#' For keeping variables across iterations, it's recommended to use `env` instead.
|
||||
#' \item data The data to which the model is being fit, as an `xgb.DMatrix` object.
|
||||
#'
|
||||
#' Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
|
||||
#' folds can be found in the `model` object.
|
||||
#'
|
||||
#' \item evals The evaluation data, as passed under argument `evals` to
|
||||
#' \link{xgb.train}.
|
||||
#'
|
||||
#' For \link{xgb.cv}, this will always be `NULL`.
|
||||
#'
|
||||
#' \item begin_iteration Index of the first boosting iteration that will be executed
|
||||
#' (base-1 indexing).
|
||||
#'
|
||||
#' This will typically be '1', but when using training continuation, depending on the
|
||||
#' parameters for updates, boosting rounds will be continued from where the previous
|
||||
#' model ended, in which case this will be larger than 1.
|
||||
#'
|
||||
#' \item end_iteration Index of the last boostign iteration that will be executed
|
||||
#' (base-1 indexing, inclusive of this end).
|
||||
#'
|
||||
#' It should match with argument `nrounds` passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
#'
|
||||
#' Note that boosting might be interrupted before reaching this last iteration, for
|
||||
#' example by using the early stopping callback \link{xgb.cb.early.stop}.
|
||||
#'
|
||||
#' \item iteration Index of the iteration number that is being executed (first iteration
|
||||
#' will be the same as parameter `begin_iteration`, then next one will add +1, and so on).
|
||||
#'
|
||||
#' \item iter_feval Evaluation metrics for `evals` that were supplied, either
|
||||
#' determined by the objective, or by parameter `feval`.
|
||||
#'
|
||||
#' For \link{xgb.train}, this will be a named vector with one entry per element in
|
||||
#' `evals`, where the names are determined as 'evals name' + '-' + 'metric name' - for
|
||||
#' example, if `evals` contains an entry named "tr" and the metric is "rmse",
|
||||
#' this will be a one-element vector with name "tr-rmse".
|
||||
#'
|
||||
#' For \link{xgb.cv}, this will be a 2d matrix with dimensions `[length(evals), nfolds]`,
|
||||
#' where the row names will follow the same naming logic as the one-dimensional vector
|
||||
#' that is passed in \link{xgb.train}.
|
||||
#'
|
||||
#' Note that, internally, the built-in callbacks such as \link{xgb.cb.print.evaluation} summarize
|
||||
#' this table by calculating the row-wise means and standard deviations.
|
||||
#'
|
||||
#' \item final_feval The evaluation results after the last boosting round is executed
|
||||
#' (same format as `iter_feval`, and will be the exact same input as passed under
|
||||
#' `iter_feval` to the last round that is executed during model fitting).
|
||||
#'
|
||||
#' \item prev_cb_res Result from a previous run of a callback sharing the same name
|
||||
#' (as given by parameter `cb_name`) when conducting training continuation, if there
|
||||
#' was any in the booster R attributes.
|
||||
#'
|
||||
#' Some times, one might want to append the new results to the previous one, and this will
|
||||
#' be done automatically by the built-in callbacks such as \link{xgb.cb.evaluation.log},
|
||||
#' which will append the new rows to the previous table.
|
||||
#'
|
||||
#' If no such previous callback result is available (which it never will when fitting
|
||||
#' a model from start instead of updating an existing model), this will be `NULL`.
|
||||
#'
|
||||
#' For \link{xgb.cv}, which doesn't support training continuation, this will always be `NULL`.
|
||||
#' }
|
||||
#'
|
||||
#' The following names (`cb_name` values) are reserved for internal callbacks:\itemize{
|
||||
#' \item print_evaluation
|
||||
#' \item evaluation_log
|
||||
#' \item reset_parameters
|
||||
#' \item early_stop
|
||||
#' \item save_model
|
||||
#' \item cv_predict
|
||||
#' \item gblinear_history
|
||||
#' }
|
||||
#'
|
||||
#' The following names are reserved for other non-callback attributes:\itemize{
|
||||
#' \item names
|
||||
#' \item class
|
||||
#' \item call
|
||||
#' \item params
|
||||
#' \item niter
|
||||
#' \item nfeatures
|
||||
#' \item folds
|
||||
#' }
|
||||
#'
|
||||
#' When using the built-in early stopping callback (\link{xgb.cb.early.stop}), said callback
|
||||
#' will always be executed before the others, as it sets some booster C-level attributes
|
||||
#' that other callbacks might also use. Otherwise, the order of execution will match with
|
||||
#' the order in which the callbacks are passed to the model fitting function.
|
||||
#' @seealso Built-in callbacks:\itemize{
|
||||
#' \item \link{xgb.cb.print.evaluation}
|
||||
#' \item \link{xgb.cb.evaluation.log}
|
||||
#' \item \link{xgb.cb.reset.parameters}
|
||||
#' \item \link{xgb.cb.early.stop}
|
||||
#' \item \link{xgb.cb.save.model}
|
||||
#' \item \link{xgb.cb.cv.predict}
|
||||
#' \item \link{xgb.cb.gblinear.history}
|
||||
#' }
|
||||
#' @seealso Built-in callbacks:
|
||||
#' - [xgb.cb.print.evaluation]
|
||||
#' - [xgb.cb.evaluation.log]
|
||||
#' - [xgb.cb.reset.parameters]
|
||||
#' - [xgb.cb.early.stop]
|
||||
#' - [xgb.cb.save.model]
|
||||
#' - [xgb.cb.cv.predict]
|
||||
#' - [xgb.cb.gblinear.history]
|
||||
#
|
||||
#' @examples
|
||||
#' # Example constructing a custom callback that calculates
|
||||
#' # squared error on the training data (no separate test set),
|
||||
@ -203,8 +197,10 @@
|
||||
#' )
|
||||
#'
|
||||
#' data(mtcars)
|
||||
#'
|
||||
#' y <- mtcars$mpg
|
||||
#' x <- as.matrix(mtcars[, -1])
|
||||
#'
|
||||
#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
#' model <- xgb.train(
|
||||
#' data = dm,
|
||||
@ -407,16 +403,18 @@ xgb.Callback <- function(
|
||||
return(paste0(iter, res))
|
||||
}
|
||||
|
||||
#' @title Callback for printing the result of evaluation
|
||||
#' @param period results would be printed every number of periods
|
||||
#' @param showsd whether standard deviations should be printed (when available)
|
||||
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
#' Callback for printing the result of evaluation
|
||||
#'
|
||||
#' @description
|
||||
#' The callback function prints the result of evaluation at every \code{period} iterations.
|
||||
#' The callback function prints the result of evaluation at every `period` iterations.
|
||||
#' The initial and the last iteration's evaluations are always printed.
|
||||
#'
|
||||
#' Does not leave any attribute in the booster (see \link{xgb.cb.evaluation.log} for that).
|
||||
#' @seealso \link{xgb.Callback}
|
||||
#' Does not leave any attribute in the booster (see [xgb.cb.evaluation.log] for that).
|
||||
#'
|
||||
#' @param period Results would be printed every number of periods.
|
||||
#' @param showsd Whether standard deviations should be printed (when available).
|
||||
#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].
|
||||
#' @seealso [xgb.Callback]
|
||||
#' @export
|
||||
xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
|
||||
if (length(period) != 1 || period != floor(period) || period < 1) {
|
||||
@ -450,14 +448,16 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
|
||||
)
|
||||
}
|
||||
|
||||
#' @title Callback for logging the evaluation history
|
||||
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
#' Callback for logging the evaluation history
|
||||
#'
|
||||
#' @details This callback creates a table with per-iteration evaluation metrics (see parameters
|
||||
#' `evals` and `feval` in \link{xgb.train}).
|
||||
#' @details
|
||||
#' `evals` and `feval` in [xgb.train()]).
|
||||
#'
|
||||
#' Note: in the column names of the final data.table, the dash '-' character is replaced with
|
||||
#' the underscore '_' in order to make the column names more like regular R identifiers.
|
||||
#' @seealso \link{xgb.cb.print.evaluation}
|
||||
#'
|
||||
#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].
|
||||
#' @seealso [xgb.cb.print.evaluation]
|
||||
#' @export
|
||||
xgb.cb.evaluation.log <- function() {
|
||||
xgb.Callback(
|
||||
@ -517,20 +517,22 @@ xgb.cb.evaluation.log <- function() {
|
||||
)
|
||||
}
|
||||
|
||||
#' @title Callback for resetting the booster's parameters at each iteration.
|
||||
#' @param new_params a list where each element corresponds to a parameter that needs to be reset.
|
||||
#' Each element's value must be either a vector of values of length \code{nrounds}
|
||||
#' to be set at each iteration,
|
||||
#' or a function of two parameters \code{learning_rates(iteration, nrounds)}
|
||||
#' which returns a new parameter value by using the current iteration number
|
||||
#' and the total number of boosting rounds.
|
||||
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
#' Callback for resetting booster parameters at each iteration
|
||||
#'
|
||||
#' @details
|
||||
#' Note that when training is resumed from some previous model, and a function is used to
|
||||
#' reset a parameter value, the \code{nrounds} argument in this function would be the
|
||||
#' reset a parameter value, the `nrounds` argument in this function would be the
|
||||
#' the number of boosting rounds in the current training.
|
||||
#'
|
||||
#' Does not leave any attribute in the booster.
|
||||
#'
|
||||
#' @param new_params List of parameters needed to be reset.
|
||||
#' Each element's value must be either a vector of values of length `nrounds`
|
||||
#' to be set at each iteration,
|
||||
#' or a function of two parameters `learning_rates(iteration, nrounds)`
|
||||
#' which returns a new parameter value by using the current iteration number
|
||||
#' and the total number of boosting rounds.
|
||||
#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].
|
||||
#' @export
|
||||
xgb.cb.reset.parameters <- function(new_params) {
|
||||
stopifnot(is.list(new_params))
|
||||
@ -583,39 +585,39 @@ xgb.cb.reset.parameters <- function(new_params) {
|
||||
)
|
||||
}
|
||||
|
||||
#' @title Callback to activate early stopping
|
||||
#' @param stopping_rounds The number of rounds with no improvement in
|
||||
#' the evaluation metric in order to stop the training.
|
||||
#' @param maximize Whether to maximize the evaluation metric.
|
||||
#' @param metric_name The name of an evaluation column to use as a criteria for early
|
||||
#' stopping. If not set, the last column would be used.
|
||||
#' Let's say the test data in \code{evals} was labelled as \code{dtest},
|
||||
#' and one wants to use the AUC in test data for early stopping regardless of where
|
||||
#' it is in the \code{evals}, then one of the following would need to be set:
|
||||
#' \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
|
||||
#' All dash '-' characters in metric names are considered equivalent to '_'.
|
||||
#' @param verbose Whether to print the early stopping information.
|
||||
#' @param keep_all_iter Whether to keep all of the boosting rounds that were produced
|
||||
#' in the resulting object. If passing `FALSE`, will only keep the boosting rounds
|
||||
#' up to the detected best iteration, discarding the ones that come after.
|
||||
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
#' Callback to activate early stopping
|
||||
#'
|
||||
#' @description
|
||||
#' This callback function determines the condition for early stopping.
|
||||
#'
|
||||
#' The following attributes are assigned to the booster's object:
|
||||
#' \itemize{
|
||||
#' \item \code{best_score} the evaluation score at the best iteration
|
||||
#' \item \code{best_iteration} at which boosting iteration the best score has occurred
|
||||
#' - `best_score` the evaluation score at the best iteration
|
||||
#' - `best_iteration` at which boosting iteration the best score has occurred
|
||||
#' (0-based index for interoperability of binary models)
|
||||
#' }
|
||||
#'
|
||||
#' The same values are also stored as R attributes as a result of the callback, plus an additional
|
||||
#' attribute `stopped_by_max_rounds` which indicates whether an early stopping by the `stopping_rounds`
|
||||
#' condition occurred. Note that the `best_iteration` that is stored under R attributes will follow
|
||||
#' base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
|
||||
#' through \link{xgb.attr} or \link{xgb.attributes}.
|
||||
#' through [xgb.attr()] or [xgb.attributes()].
|
||||
#'
|
||||
#' At least one dataset is required in `evals` for early stopping to work.
|
||||
#'
|
||||
#' @param stopping_rounds The number of rounds with no improvement in
|
||||
#' the evaluation metric in order to stop the training.
|
||||
#' @param maximize Whether to maximize the evaluation metric.
|
||||
#' @param metric_name The name of an evaluation column to use as a criteria for early
|
||||
#' stopping. If not set, the last column would be used.
|
||||
#' Let's say the test data in `evals` was labelled as `dtest`,
|
||||
#' and one wants to use the AUC in test data for early stopping regardless of where
|
||||
#' it is in the `evals`, then one of the following would need to be set:
|
||||
#' `metric_name = 'dtest-auc'` or `metric_name = 'dtest_auc'`.
|
||||
#' All dash '-' characters in metric names are considered equivalent to '_'.
|
||||
#' @param verbose Whether to print the early stopping information.
|
||||
#' @param keep_all_iter Whether to keep all of the boosting rounds that were produced
|
||||
#' in the resulting object. If passing `FALSE`, will only keep the boosting rounds
|
||||
#' up to the detected best iteration, discarding the ones that come after.
|
||||
#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].
|
||||
#' @export
|
||||
xgb.cb.early.stop <- function(
|
||||
stopping_rounds,
|
||||
@ -771,21 +773,22 @@ xgb.cb.early.stop <- function(
|
||||
xgb.save(model, save_name)
|
||||
}
|
||||
|
||||
#' @title Callback for saving a model file.
|
||||
#' @param save_period Save the model to disk after every
|
||||
#' \code{save_period} iterations; 0 means save the model at the end.
|
||||
#' @param save_name The name or path for the saved model file.
|
||||
#' It can contain a \code{\link[base]{sprintf}} formatting specifier
|
||||
#' to include the integer iteration number in the file name.
|
||||
#' E.g., with \code{save_name} = 'xgboost_%04d.model',
|
||||
#' the file saved at iteration 50 would be named "xgboost_0050.model".
|
||||
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train},
|
||||
#' but \bold{not} to \link{xgb.cv}.
|
||||
#' Callback for saving a model file
|
||||
#'
|
||||
#' @description
|
||||
#' This callback function allows to save an xgb-model file, either periodically
|
||||
#' after each \code{save_period}'s or at the end.
|
||||
#' after each `save_period`'s or at the end.
|
||||
#'
|
||||
#' Does not leave any attribute in the booster.
|
||||
#'
|
||||
#' @param save_period Save the model to disk after every `save_period` iterations;
|
||||
#' 0 means save the model at the end.
|
||||
#' @param save_name The name or path for the saved model file.
|
||||
#' It can contain a [sprintf()] formatting specifier to include the integer
|
||||
#' iteration number in the file name. E.g., with `save_name = 'xgboost_%04d.model'`,
|
||||
#' the file saved at iteration 50 would be named "xgboost_0050.model".
|
||||
#' @return An `xgb.Callback` object, which can be passed to [xgb.train()],
|
||||
#' but **not** to [xgb.cv()].
|
||||
#' @export
|
||||
xgb.cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
|
||||
if (save_period < 0) {
|
||||
@ -817,24 +820,26 @@ xgb.cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
|
||||
)
|
||||
}
|
||||
|
||||
#' @title Callback for returning cross-validation based predictions.
|
||||
#' @param save_models A flag for whether to save the folds' models.
|
||||
#' @param outputmargin Whether to save margin predictions (same effect as passing this
|
||||
#' parameter to \link{predict.xgb.Booster}).
|
||||
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.cv},
|
||||
#' but \bold{not} to \link{xgb.train}.
|
||||
#' @description
|
||||
#' Callback for returning cross-validation based predictions
|
||||
#'
|
||||
#' This callback function saves predictions for all of the test folds,
|
||||
#' and also allows to save the folds' models.
|
||||
#'
|
||||
#' @details
|
||||
#' Predictions are saved inside of the \code{pred} element, which is either a vector or a matrix,
|
||||
#' Predictions are saved inside of the `pred` element, which is either a vector or a matrix,
|
||||
#' depending on the number of prediction outputs per data row. The order of predictions corresponds
|
||||
#' to the order of rows in the original dataset. Note that when a custom \code{folds} list is
|
||||
#' provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a
|
||||
#' to the order of rows in the original dataset. Note that when a custom `folds` list is
|
||||
#' provided in [xgb.cv()], the predictions would only be returned properly when this list is a
|
||||
#' non-overlapping list of k sets of indices, as in a standard k-fold CV. The predictions would not be
|
||||
#' meaningful when user-provided folds have overlapping indices as in, e.g., random sampling splits.
|
||||
#' When some of the indices in the training dataset are not included into user-provided \code{folds},
|
||||
#' their prediction value would be \code{NA}.
|
||||
#' When some of the indices in the training dataset are not included into user-provided `folds`,
|
||||
#' their prediction value would be `NA`.
|
||||
#'
|
||||
#' @param save_models A flag for whether to save the folds' models.
|
||||
#' @param outputmargin Whether to save margin predictions (same effect as passing this
|
||||
#' parameter to [predict.xgb.Booster]).
|
||||
#' @return An `xgb.Callback` object, which can be passed to [xgb.cv()],
|
||||
#' but **not** to [xgb.train()].
|
||||
#' @export
|
||||
xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
|
||||
xgb.Callback(
|
||||
@ -903,19 +908,15 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
|
||||
return(coefs)
|
||||
}
|
||||
|
||||
#' @title Callback for collecting coefficients history of a gblinear booster
|
||||
#' @param sparse when set to `FALSE`/`TRUE`, a dense/sparse matrix is used to store the result.
|
||||
#' Sparse format is useful when one expects only a subset of coefficients to be non-zero,
|
||||
#' when using the "thrifty" feature selector with fairly small number of top features
|
||||
#' selected per iteration.
|
||||
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
#' Callback for collecting coefficients history of a gblinear booster
|
||||
#'
|
||||
#' @details
|
||||
#' To keep things fast and simple, gblinear booster does not internally store the history of linear
|
||||
#' model coefficients at each boosting iteration. This callback provides a workaround for storing
|
||||
#' the coefficients' path, by extracting them after each training iteration.
|
||||
#'
|
||||
#' This callback will construct a matrix where rows are boosting iterations and columns are
|
||||
#' feature coefficients (same order as when calling \link{coef.xgb.Booster}, with the intercept
|
||||
#' feature coefficients (same order as when calling [coef.xgb.Booster], with the intercept
|
||||
#' corresponding to the first column).
|
||||
#'
|
||||
#' When there is more than one coefficient per feature (e.g. multi-class classification),
|
||||
@ -928,13 +929,18 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
|
||||
#' one coefficient per feature) the names will be composed as 'column name' + ':' + 'class index'
|
||||
#' (so e.g. column 'c1' for class '0' will be named 'c1:0').
|
||||
#'
|
||||
#' With \code{xgb.train}, the output is either a dense or a sparse matrix.
|
||||
#' With with \code{xgb.cv}, it is a list (one element per each fold) of such
|
||||
#' matrices.
|
||||
#' With [xgb.train()], the output is either a dense or a sparse matrix.
|
||||
#' With with [xgb.cv()], it is a list (one element per each fold) of such matrices.
|
||||
#'
|
||||
#' Function \link{xgb.gblinear.history} function provides an easy way to retrieve the
|
||||
#' Function [xgb.gblinear.history] provides an easy way to retrieve the
|
||||
#' outputs from this callback.
|
||||
#' @seealso \link{xgb.gblinear.history}, \link{coef.xgb.Booster}.
|
||||
#'
|
||||
#' @param sparse When set to `FALSE`/`TRUE`, a dense/sparse matrix is used to store the result.
|
||||
#' Sparse format is useful when one expects only a subset of coefficients to be non-zero,
|
||||
#' when using the "thrifty" feature selector with fairly small number of top features
|
||||
#' selected per iteration.
|
||||
#' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()].
|
||||
#' @seealso [xgb.gblinear.history], [coef.xgb.Booster].
|
||||
#' @examples
|
||||
#' #### Binary classification:
|
||||
#'
|
||||
@ -944,57 +950,109 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
|
||||
#'
|
||||
#' # In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||
#' # without considering the 2nd order interactions:
|
||||
#' x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||
#' x <- model.matrix(Species ~ .^2, iris)[, -1]
|
||||
#' colnames(x)
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
|
||||
#' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||
#' dtrain <- xgb.DMatrix(
|
||||
#' scale(x),
|
||||
#' label = 1 * (iris$Species == "versicolor"),
|
||||
#' nthread = nthread
|
||||
#' )
|
||||
#' param <- list(
|
||||
#' booster = "gblinear",
|
||||
#' objective = "reg:logistic",
|
||||
#' eval_metric = "auc",
|
||||
#' lambda = 0.0003,
|
||||
#' alpha = 0.0003,
|
||||
#' nthread = nthread
|
||||
#' )
|
||||
#'
|
||||
#' # For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||
#' # unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
||||
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
|
||||
#' # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations.
|
||||
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1.,
|
||||
#' callbacks = list(xgb.cb.gblinear.history()))
|
||||
#' bst <- xgb.train(
|
||||
#' param,
|
||||
#' dtrain,
|
||||
#' list(tr = dtrain),
|
||||
#' nrounds = 200,
|
||||
#' eta = 1.,
|
||||
#' callbacks = list(xgb.cb.gblinear.history())
|
||||
#' )
|
||||
#'
|
||||
#' # Extract the coefficients' path and plot them vs boosting iteration number:
|
||||
#' coef_path <- xgb.gblinear.history(bst)
|
||||
#' matplot(coef_path, type = 'l')
|
||||
#' matplot(coef_path, type = "l")
|
||||
#'
|
||||
#' # With the deterministic coordinate descent updater, it is safer to use higher learning rates.
|
||||
#' # Will try the classical componentwise boosting which selects a single best feature per round:
|
||||
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8,
|
||||
#' updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1,
|
||||
#' callbacks = list(xgb.cb.gblinear.history()))
|
||||
#' matplot(xgb.gblinear.history(bst), type = 'l')
|
||||
#' bst <- xgb.train(
|
||||
#' param,
|
||||
#' dtrain,
|
||||
#' list(tr = dtrain),
|
||||
#' nrounds = 200,
|
||||
#' eta = 0.8,
|
||||
#' updater = "coord_descent",
|
||||
#' feature_selector = "thrifty",
|
||||
#' top_k = 1,
|
||||
#' callbacks = list(xgb.cb.gblinear.history())
|
||||
#' )
|
||||
#' matplot(xgb.gblinear.history(bst), type = "l")
|
||||
#' # Componentwise boosting is known to have similar effect to Lasso regularization.
|
||||
#' # Try experimenting with various values of top_k, eta, nrounds,
|
||||
#' # as well as different feature_selectors.
|
||||
#'
|
||||
#' # For xgb.cv:
|
||||
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
||||
#' callbacks = list(xgb.cb.gblinear.history()))
|
||||
#' bst <- xgb.cv(
|
||||
#' param,
|
||||
#' dtrain,
|
||||
#' nfold = 5,
|
||||
#' nrounds = 100,
|
||||
#' eta = 0.8,
|
||||
#' callbacks = list(xgb.cb.gblinear.history())
|
||||
#' )
|
||||
#' # coefficients in the CV fold #3
|
||||
#' matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||
#' matplot(xgb.gblinear.history(bst)[[3]], type = "l")
|
||||
#'
|
||||
#'
|
||||
#' #### Multiclass classification:
|
||||
#' #
|
||||
#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
|
||||
#' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||
#' lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||
#'
|
||||
#' param <- list(
|
||||
#' booster = "gblinear",
|
||||
#' objective = "multi:softprob",
|
||||
#' num_class = 3,
|
||||
#' lambda = 0.0003,
|
||||
#' alpha = 0.0003,
|
||||
#' nthread = nthread
|
||||
#' )
|
||||
#'
|
||||
#' # For the default linear updater 'shotgun' it sometimes is helpful
|
||||
#' # to use smaller eta to reduce instability
|
||||
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
||||
#' callbacks = list(xgb.cb.gblinear.history()))
|
||||
#' bst <- xgb.train(
|
||||
#' param,
|
||||
#' dtrain,
|
||||
#' list(tr = dtrain),
|
||||
#' nrounds = 50,
|
||||
#' eta = 0.5,
|
||||
#' callbacks = list(xgb.cb.gblinear.history())
|
||||
#' )
|
||||
#'
|
||||
#' # Will plot the coefficient paths separately for each class:
|
||||
#' matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l')
|
||||
#' matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l')
|
||||
#' matplot(xgb.gblinear.history(bst, class_index = 2), type = 'l')
|
||||
#' matplot(xgb.gblinear.history(bst, class_index = 0), type = "l")
|
||||
#' matplot(xgb.gblinear.history(bst, class_index = 1), type = "l")
|
||||
#' matplot(xgb.gblinear.history(bst, class_index = 2), type = "l")
|
||||
#'
|
||||
#' # CV:
|
||||
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5,
|
||||
#' callbacks = list(xgb.cb.gblinear.history(FALSE)))
|
||||
#' bst <- xgb.cv(
|
||||
#' param,
|
||||
#' dtrain,
|
||||
#' nfold = 5,
|
||||
#' nrounds = 70,
|
||||
#' eta = 0.5,
|
||||
#' callbacks = list(xgb.cb.gblinear.history(FALSE))
|
||||
#' )
|
||||
#' # 1st fold of 1st class
|
||||
#' matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l')
|
||||
#' matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = "l")
|
||||
#'
|
||||
#' @export
|
||||
xgb.cb.gblinear.history <- function(sparse = FALSE) {
|
||||
@ -1097,28 +1155,31 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
|
||||
)
|
||||
}
|
||||
|
||||
#' @title Extract gblinear coefficients history.
|
||||
#' @description A helper function to extract the matrix of linear coefficients' history
|
||||
#' from a gblinear model created while using the \link{xgb.cb.gblinear.history}
|
||||
#' callback (which must be added manually as by default it's not used).
|
||||
#' @details Note that this is an R-specific function that relies on R attributes that
|
||||
#' are not saved when using xgboost's own serialization functions like \link{xgb.load}
|
||||
#' or \link{xgb.load.raw}.
|
||||
#' Extract gblinear coefficients history
|
||||
#'
|
||||
#' A helper function to extract the matrix of linear coefficients' history
|
||||
#' from a gblinear model created while using the [xgb.cb.gblinear.history]
|
||||
#' callback (which must be added manually as by default it is not used).
|
||||
#'
|
||||
#' @details
|
||||
#' Note that this is an R-specific function that relies on R attributes that
|
||||
#' are not saved when using XGBoost's own serialization functions like [xgb.load()]
|
||||
#' or [xgb.load.raw()].
|
||||
#'
|
||||
#' In order for a serialized model to be accepted by this function, one must use R
|
||||
#' serializers such as \link{saveRDS}.
|
||||
#' @param model either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained
|
||||
#' using the \link{xgb.cb.gblinear.history} callback, but \bold{not} a booster
|
||||
#' loaded from \link{xgb.load} or \link{xgb.load.raw}.
|
||||
#' serializers such as [saveRDS()].
|
||||
#' @param model Either an `xgb.Booster` or a result of [xgb.cv()], trained
|
||||
#' using the [xgb.cb.gblinear.history] callback, but **not** a booster
|
||||
#' loaded from [xgb.load()] or [xgb.load.raw()].
|
||||
#' @param class_index zero-based class index to extract the coefficients for only that
|
||||
#' specific class in a multinomial multiclass model. When it is NULL, all the
|
||||
#' specific class in a multinomial multiclass model. When it is `NULL`, all the
|
||||
#' coefficients are returned. Has no effect in non-multiclass models.
|
||||
#'
|
||||
#' @return
|
||||
#' For an \link{xgb.train} result, a matrix (either dense or sparse) with the columns
|
||||
#' For an [xgb.train()] result, a matrix (either dense or sparse) with the columns
|
||||
#' corresponding to iteration's coefficients and the rows corresponding to boosting iterations.
|
||||
#'
|
||||
#' For an \link{xgb.cv} result, a list of such matrices is returned with the elements
|
||||
#' For an [xgb.cv()] result, a list of such matrices is returned with the elements
|
||||
#' corresponding to CV folds.
|
||||
#'
|
||||
#' When there is more than one coefficient per feature (e.g. multi-class classification)
|
||||
@ -1126,7 +1187,7 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
|
||||
#' the result will be reshaped into a vector where coefficients are arranged first by features and
|
||||
#' then by class (e.g. first 1 through N coefficients will be for the first class, then
|
||||
#' coefficients N+1 through 2N for the second class, and so on).
|
||||
#' @seealso \link{xgb.cb.gblinear.history}, \link{coef.xgb.Booster}.
|
||||
#' @seealso [xgb.cb.gblinear.history], [coef.xgb.Booster].
|
||||
#' @export
|
||||
xgb.gblinear.history <- function(model, class_index = NULL) {
|
||||
|
||||
|
||||
@ -410,7 +410,7 @@ xgb.createFolds <- function(y, k) {
|
||||
#' At this time, some of the parameter names were changed in order to make the code style more uniform.
|
||||
#' The deprecated parameters would be removed in the next release.
|
||||
#'
|
||||
#' To see all the current deprecated and new parameters, check the \code{xgboost:::depr_par_lut} table.
|
||||
#' To see all the current deprecated and new parameters, check the `xgboost:::depr_par_lut` table.
|
||||
#'
|
||||
#' A deprecation warning is shown when any of the deprecated parameters is used in a call.
|
||||
#' An additional warning is shown when there was a partial match to a deprecated parameter
|
||||
@ -419,70 +419,79 @@ xgb.createFolds <- function(y, k) {
|
||||
#' @name xgboost-deprecated
|
||||
NULL
|
||||
|
||||
#' @title Model Serialization and Compatibility
|
||||
#' @description
|
||||
#' Model Serialization and Compatibility
|
||||
#'
|
||||
#' @description
|
||||
#' When it comes to serializing XGBoost models, it's possible to use R serializers such as
|
||||
#' \link{save} or \link{saveRDS} to serialize an XGBoost R model, but XGBoost also provides
|
||||
#' [save()] or [saveRDS()] to serialize an XGBoost R model, but XGBoost also provides
|
||||
#' its own serializers with better compatibility guarantees, which allow loading
|
||||
#' said models in other language bindings of XGBoost.
|
||||
#'
|
||||
#' Note that an `xgb.Booster` object, outside of its core components, might also keep:\itemize{
|
||||
#' \item Additional model configuration (accessible through \link{xgb.config}),
|
||||
#' which includes model fitting parameters like `max_depth` and runtime parameters like `nthread`.
|
||||
#' Note that an `xgb.Booster` object, outside of its core components, might also keep:
|
||||
#' - Additional model configuration (accessible through [xgb.config()]), which includes
|
||||
#' model fitting parameters like `max_depth` and runtime parameters like `nthread`.
|
||||
#' These are not necessarily useful for prediction/importance/plotting.
|
||||
#' \item Additional R-specific attributes - e.g. results of callbacks, such as evaluation logs,
|
||||
#' which are kept as a `data.table` object, accessible through `attributes(model)$evaluation_log`
|
||||
#' if present.
|
||||
#' }
|
||||
#' - Additional R specific attributes - e.g. results of callbacks, such as evaluation logs,
|
||||
#' which are kept as a `data.table` object, accessible through
|
||||
#' `attributes(model)$evaluation_log` if present.
|
||||
#'
|
||||
#' The first one (configurations) does not have the same compatibility guarantees as
|
||||
#' the model itself, including attributes that are set and accessed through \link{xgb.attributes} - that is, such configuration
|
||||
#' might be lost after loading the booster in a different XGBoost version, regardless of the
|
||||
#' serializer that was used. These are saved when using \link{saveRDS}, but will be discarded
|
||||
#' if loaded into an incompatible XGBoost version. They are not saved when using XGBoost's
|
||||
#' serializers from its public interface including \link{xgb.save} and \link{xgb.save.raw}.
|
||||
#' the model itself, including attributes that are set and accessed through
|
||||
#' [xgb.attributes()] - that is, such configuration might be lost after loading the
|
||||
#' booster in a different XGBoost version, regardless of the serializer that was used.
|
||||
#' These are saved when using [saveRDS()], but will be discarded if loaded into an
|
||||
#' incompatible XGBoost version. They are not saved when using XGBoost's
|
||||
#' serializers from its public interface including [xgb.save()] and [xgb.save.raw()].
|
||||
#'
|
||||
#' The second ones (R attributes) are not part of the standard XGBoost model structure, and thus are
|
||||
#' not saved when using XGBoost's own serializers. These attributes are only used for informational
|
||||
#' purposes, such as keeping track of evaluation metrics as the model was fit, or saving the R
|
||||
#' call that produced the model, but are otherwise not used for prediction / importance / plotting / etc.
|
||||
#' The second ones (R attributes) are not part of the standard XGBoost model structure,
|
||||
#' and thus are not saved when using XGBoost's own serializers. These attributes are
|
||||
#' only used for informational purposes, such as keeping track of evaluation metrics as
|
||||
#' the model was fit, or saving the R call that produced the model, but are otherwise
|
||||
#' not used for prediction / importance / plotting / etc.
|
||||
#' These R attributes are only preserved when using R's serializers.
|
||||
#'
|
||||
#' Note that XGBoost models in R starting from version `2.1.0` and onwards, and XGBoost models
|
||||
#' before version `2.1.0`; have a very different R object structure and are incompatible with
|
||||
#' each other. Hence, models that were saved with R serializers live `saveRDS` or `save` before
|
||||
#' version `2.1.0` will not work with latter `xgboost` versions and vice versa. Be aware that
|
||||
#' the structure of R model objects could in theory change again in the future, so XGBoost's serializers
|
||||
#' Note that XGBoost models in R starting from version `2.1.0` and onwards, and
|
||||
#' XGBoost models before version `2.1.0`; have a very different R object structure and
|
||||
#' are incompatible with each other. Hence, models that were saved with R serializers
|
||||
#' like [saveRDS()] or [save()] before version `2.1.0` will not work with latter
|
||||
#' `xgboost` versions and vice versa. Be aware that the structure of R model objects
|
||||
#' could in theory change again in the future, so XGBoost's serializers
|
||||
#' should be preferred for long-term storage.
|
||||
#'
|
||||
#' Furthermore, note that using the package `qs` for serialization will require version 0.26 or
|
||||
#' higher of said package, and will have the same compatibility restrictions as R serializers.
|
||||
#' Furthermore, note that using the package `qs` for serialization will require
|
||||
#' version 0.26 or higher of said package, and will have the same compatibility
|
||||
#' restrictions as R serializers.
|
||||
#'
|
||||
#' @details
|
||||
#' Use \code{\link{xgb.save}} to save the XGBoost model as a stand-alone file. You may opt into
|
||||
#' Use [xgb.save()] to save the XGBoost model as a stand-alone file. You may opt into
|
||||
#' the JSON format by specifying the JSON extension. To read the model back, use
|
||||
#' \code{\link{xgb.load}}.
|
||||
#' [xgb.load()].
|
||||
#'
|
||||
#' Use \code{\link{xgb.save.raw}} to save the XGBoost model as a sequence (vector) of raw bytes
|
||||
#' Use [xgb.save.raw()] to save the XGBoost model as a sequence (vector) of raw bytes
|
||||
#' in a future-proof manner. Future releases of XGBoost will be able to read the raw bytes and
|
||||
#' re-construct the corresponding model. To read the model back, use \code{\link{xgb.load.raw}}.
|
||||
#' The \code{\link{xgb.save.raw}} function is useful if you'd like to persist the XGBoost model
|
||||
#' re-construct the corresponding model. To read the model back, use [xgb.load.raw()].
|
||||
#' The [xgb.save.raw()] function is useful if you would like to persist the XGBoost model
|
||||
#' as part of another R object.
|
||||
#'
|
||||
#' Use \link{saveRDS} if you require the R-specific attributes that a booster might have, such
|
||||
#' Use [saveRDS()] if you require the R-specific attributes that a booster might have, such
|
||||
#' as evaluation logs, but note that future compatibility of such objects is outside XGBoost's
|
||||
#' control as it relies on R's serialization format (see e.g. the details section in
|
||||
#' \link{serialize} and \link{save} from base R).
|
||||
#' [serialize] and [save()] from base R).
|
||||
#'
|
||||
#' For more details and explanation about model persistence and archival, consult the page
|
||||
#' \url{https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html}.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' bst <- xgb.train(data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
||||
#' max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
||||
#' objective = "binary:logistic")
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#'
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
||||
#' max_depth = 2,
|
||||
#' eta = 1,
|
||||
#' nthread = 2,
|
||||
#' nrounds = 2,
|
||||
#' objective = "binary:logistic"
|
||||
#' )
|
||||
#'
|
||||
#' # Save as a stand-alone file; load it with xgb.load()
|
||||
#' fname <- file.path(tempdir(), "xgb_model.ubj")
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Construct an internal xgboost Booster and get its current number of rounds.
|
||||
# Construct an internal XGBoost Booster and get its current number of rounds.
|
||||
# internal utility function
|
||||
# Note: the number of rounds in the C booster gets reset to zero when changing
|
||||
# key booster parameters like 'process_type=update', but in some cases, when
|
||||
@ -64,7 +64,7 @@ xgb.get.handle <- function(object) {
|
||||
if (inherits(object, "xgb.Booster")) {
|
||||
handle <- object$ptr
|
||||
if (is.null(handle) || !inherits(handle, "externalptr")) {
|
||||
stop("'xgb.Booster' object is corrupted or is from an incompatible xgboost version.")
|
||||
stop("'xgb.Booster' object is corrupted or is from an incompatible XGBoost version.")
|
||||
}
|
||||
} else {
|
||||
stop("argument must be an 'xgb.Booster' object.")
|
||||
@ -77,37 +77,38 @@ xgb.get.handle <- function(object) {
|
||||
|
||||
#' Predict method for XGBoost model
|
||||
#'
|
||||
#' Predict values on data based on xgboost model.
|
||||
#' Predict values on data based on XGBoost model.
|
||||
#'
|
||||
#' @param object Object of class `xgb.Booster`.
|
||||
#' @param newdata Takes `data.frame`, `matrix`, `dgCMatrix`, `dgRMatrix`, `dsparseVector`,
|
||||
#' local data file, or `xgb.DMatrix`.
|
||||
#'
|
||||
#' For single-row predictions on sparse data, it's recommended to use CSR format. If passing
|
||||
#' For single-row predictions on sparse data, it is recommended to use CSR format. If passing
|
||||
#' a sparse vector, it will take it as a row vector.
|
||||
#'
|
||||
#' Note that, for repeated predictions on the same data, one might want to create a DMatrix to
|
||||
#' pass here instead of passing R types like matrices or data frames, as predictions will be
|
||||
#' faster on DMatrix.
|
||||
#'
|
||||
#' If `newdata` is a `data.frame`, be aware that:\itemize{
|
||||
#' \item Columns will be converted to numeric if they aren't already, which could potentially make
|
||||
#' If `newdata` is a `data.frame`, be aware that:
|
||||
#' - Columns will be converted to numeric if they aren't already, which could potentially make
|
||||
#' the operation slower than in an equivalent `matrix` object.
|
||||
#' \item The order of the columns must match with that of the data from which the model was fitted
|
||||
#' - The order of the columns must match with that of the data from which the model was fitted
|
||||
#' (i.e. columns will not be referenced by their names, just by their order in the data).
|
||||
#' \item If the model was fitted to data with categorical columns, these columns must be of
|
||||
#' - If the model was fitted to data with categorical columns, these columns must be of
|
||||
#' `factor` type here, and must use the same encoding (i.e. have the same levels).
|
||||
#' \item If `newdata` contains any `factor` columns, they will be converted to base-0
|
||||
#' - If `newdata` contains any `factor` columns, they will be converted to base-0
|
||||
#' encoding (same as during DMatrix creation) - hence, one should not pass a `factor`
|
||||
#' under a column which during training had a different type.
|
||||
#' }
|
||||
#' @param missing Float value that represents missing values in data (e.g., 0 or some other extreme value).
|
||||
#' @param missing Float value that represents missing values in data
|
||||
#' (e.g., 0 or some other extreme value).
|
||||
#'
|
||||
#' This parameter is not used when `newdata` is an `xgb.DMatrix` - in such cases, should pass
|
||||
#' this as an argument to the DMatrix constructor instead.
|
||||
#' @param outputmargin Whether the prediction should be returned in the form of original untransformed
|
||||
#' sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
|
||||
#' logistic regression would return log-odds instead of probabilities.
|
||||
#' This parameter is not used when `newdata` is an `xgb.DMatrix` - in such cases,
|
||||
#' should pass this as an argument to the DMatrix constructor instead.
|
||||
#' @param outputmargin Whether the prediction should be returned in the form of
|
||||
#' original untransformed sum of predictions from boosting iterations' results.
|
||||
#' E.g., setting `outputmargin = TRUE` for logistic regression would return log-odds
|
||||
#' instead of probabilities.
|
||||
#' @param predleaf Whether to predict per-tree leaf indices.
|
||||
#' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
|
||||
#' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
|
||||
@ -147,17 +148,16 @@ xgb.get.handle <- function(object) {
|
||||
#'
|
||||
#' Note that, if `newdata` is an `xgb.DMatrix` object, this argument will
|
||||
#' be ignored as it needs to be added to the DMatrix instead (e.g. by passing it as
|
||||
#' an argument in its constructor, or by calling \link{setinfo.xgb.DMatrix}).
|
||||
#'
|
||||
#' @param validate_features When `TRUE`, validate that the Booster's and newdata's feature_names
|
||||
#' match (only applicable when both `object` and `newdata` have feature names).
|
||||
#' an argument in its constructor, or by calling [setinfo.xgb.DMatrix()].
|
||||
#' @param validate_features When `TRUE`, validate that the Booster's and newdata's
|
||||
#' feature_names match (only applicable when both `object` and `newdata` have feature names).
|
||||
#'
|
||||
#' If the column names differ and `newdata` is not an `xgb.DMatrix`, will try to reorder
|
||||
#' the columns in `newdata` to match with the booster's.
|
||||
#'
|
||||
#' If the booster has feature types and `newdata` is either an `xgb.DMatrix` or `data.frame`,
|
||||
#' will additionally verify that categorical columns are of the correct type in `newdata`,
|
||||
#' throwing an error if they do not match.
|
||||
#' If the booster has feature types and `newdata` is either an `xgb.DMatrix` or
|
||||
#' `data.frame`, will additionally verify that categorical columns are of the
|
||||
#' correct type in `newdata`, throwing an error if they do not match.
|
||||
#'
|
||||
#' If passing `FALSE`, it is assumed that the feature names and types are the same,
|
||||
#' and come in the same order as in the training data.
|
||||
@ -167,7 +167,6 @@ xgb.get.handle <- function(object) {
|
||||
#' @param ... Not used.
|
||||
#'
|
||||
#' @details
|
||||
#'
|
||||
#' Note that `iterationrange` would currently do nothing for predictions from "gblinear",
|
||||
#' since "gblinear" doesn't keep its boosting history.
|
||||
#'
|
||||
@ -589,11 +588,33 @@ validate.features <- function(bst, newdata) {
|
||||
}
|
||||
|
||||
|
||||
#' @title Accessors for serializable attributes of a model
|
||||
#' Accessors for serializable attributes of a model
|
||||
#'
|
||||
#' @description These methods allow to manipulate the key-value attribute strings of an xgboost model.
|
||||
#' These methods allow to manipulate the key-value attribute strings of an XGBoost model.
|
||||
#'
|
||||
#' @param object Object of class `xgb.Booster`. \bold{Will be modified in-place} when assigning to it.
|
||||
#' @details
|
||||
#' The primary purpose of XGBoost model attributes is to store some meta data about the model.
|
||||
#' Note that they are a separate concept from the object attributes in R.
|
||||
#' Specifically, they refer to key-value strings that can be attached to an XGBoost model,
|
||||
#' stored together with the model's binary representation, and accessed later
|
||||
#' (from R or any other interface).
|
||||
#' In contrast, any R attribute assigned to an R object of `xgb.Booster` class
|
||||
#' would not be saved by [xgb.save()] because an XGBoost model is an external memory object
|
||||
#' and its serialization is handled externally.
|
||||
#' Also, setting an attribute that has the same name as one of XGBoost's parameters wouldn't
|
||||
#' change the value of that parameter for a model.
|
||||
#' Use [xgb.parameters<-()] to set or change model parameters.
|
||||
#'
|
||||
#' The [xgb.attributes<-()] setter either updates the existing or adds one or several attributes,
|
||||
#' but it doesn't delete the other existing attributes.
|
||||
#'
|
||||
#' Important: since this modifies the booster's C object, semantics for assignment here
|
||||
#' will differ from R's, as any object reference to the same booster will be modified
|
||||
#' too, while assignment of R attributes through `attributes(model)$<attr> <- <value>`
|
||||
#' will follow the usual copy-on-write R semantics (see [xgb.copy.Booster()] for an
|
||||
#' example of these behaviors).
|
||||
#'
|
||||
#' @param object Object of class `xgb.Booster`. **Will be modified in-place** when assigning to it.
|
||||
#' @param name A non-empty character string specifying which attribute is to be accessed.
|
||||
#' @param value For `xgb.attr<-`, a value of an attribute; for `xgb.attributes<-`,
|
||||
#' it is a list (or an object coercible to a list) with the names of attributes to set
|
||||
@ -601,29 +622,6 @@ validate.features <- function(bst, newdata) {
|
||||
#' Non-character values are converted to character.
|
||||
#' When an attribute value is not a scalar, only the first index is used.
|
||||
#' Use `NULL` to remove an attribute.
|
||||
#'
|
||||
#' @details
|
||||
#' The primary purpose of xgboost model attributes is to store some meta data about the model.
|
||||
#' Note that they are a separate concept from the object attributes in R.
|
||||
#' Specifically, they refer to key-value strings that can be attached to an xgboost model,
|
||||
#' stored together with the model's binary representation, and accessed later
|
||||
#' (from R or any other interface).
|
||||
#' In contrast, any R attribute assigned to an R object of `xgb.Booster` class
|
||||
#' would not be saved by [xgb.save()] because an xgboost model is an external memory object
|
||||
#' and its serialization is handled externally.
|
||||
#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
|
||||
#' change the value of that parameter for a model.
|
||||
#' Use [xgb.parameters<-()] to set or change model parameters.
|
||||
#'
|
||||
#' The `xgb.attributes<-` setter either updates the existing or adds one or several attributes,
|
||||
#' but it doesn't delete the other existing attributes.
|
||||
#'
|
||||
#' Important: since this modifies the booster's C object, semantics for assignment here
|
||||
#' will differ from R's, as any object reference to the same booster will be modified
|
||||
#' too, while assignment of R attributes through `attributes(model)$<attr> <- <value>`
|
||||
#' will follow the usual copy-on-write R semantics (see \link{xgb.copy.Booster} for an
|
||||
#' example of these behaviors).
|
||||
#'
|
||||
#' @return
|
||||
#' - `xgb.attr()` returns either a string value of an attribute
|
||||
#' or `NULL` if an attribute wasn't stored in a model.
|
||||
@ -720,15 +718,18 @@ xgb.attributes <- function(object) {
|
||||
return(object)
|
||||
}
|
||||
|
||||
#' @title Accessors for model parameters as JSON string
|
||||
#' @details Note that assignment is performed in-place on the booster C object, which unlike assignment
|
||||
#' Accessors for model parameters as JSON string
|
||||
#'
|
||||
#' @details
|
||||
#' Note that assignment is performed in-place on the booster C object, which unlike assignment
|
||||
#' of R attributes, doesn't follow typical copy-on-write semantics for assignment - i.e. all references
|
||||
#' to the same booster will also get updated.
|
||||
#'
|
||||
#' See \link{xgb.copy.Booster} for an example of this behavior.
|
||||
#' @param object Object of class `xgb.Booster`. \bold{Will be modified in-place} when assigning to it.
|
||||
#' @param value An R list.
|
||||
#' @return `xgb.config` will return the parameters as an R list.
|
||||
#' See [xgb.copy.Booster()] for an example of this behavior.
|
||||
#'
|
||||
#' @param object Object of class `xgb.Booster`.**Will be modified in-place** when assigning to it.
|
||||
#' @param value A list.
|
||||
#' @return Parameters as a list.
|
||||
#' @examples
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#'
|
||||
@ -767,23 +768,27 @@ xgb.config <- function(object) {
|
||||
return(object)
|
||||
}
|
||||
|
||||
#' @title Accessors for model parameters
|
||||
#' @description Only the setter for xgboost parameters is currently implemented.
|
||||
#' @details Just like \link{xgb.attr}, this function will make in-place modifications
|
||||
#' Accessors for model parameters
|
||||
#'
|
||||
#' Only the setter for XGBoost parameters is currently implemented.
|
||||
#'
|
||||
#' @details
|
||||
#' Just like [xgb.attr()], this function will make in-place modifications
|
||||
#' on the booster object which do not follow typical R assignment semantics - that is,
|
||||
#' all references to the same booster will also be updated, unlike assingment of R
|
||||
#' attributes which follow copy-on-write semantics.
|
||||
#'
|
||||
#' See \link{xgb.copy.Booster} for an example of this behavior.
|
||||
#' See [xgb.copy.Booster()] for an example of this behavior.
|
||||
#'
|
||||
#' Be aware that setting parameters of a fitted booster related to training continuation / updates
|
||||
#' will reset its number of rounds indicator to zero.
|
||||
#' @param object Object of class `xgb.Booster`. \bold{Will be modified in-place}.
|
||||
#' @param object Object of class `xgb.Booster`. **Will be modified in-place**.
|
||||
#' @param value A list (or an object coercible to a list) with the names of parameters to set
|
||||
#' and the elements corresponding to parameter values.
|
||||
#' @return The same booster `object`, which gets modified in-place.
|
||||
#' @examples
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#'
|
||||
#' train <- agaricus.train
|
||||
#'
|
||||
#' bst <- xgb.train(
|
||||
@ -859,11 +864,12 @@ setinfo.xgb.Booster <- function(object, name, info) {
|
||||
return(TRUE)
|
||||
}
|
||||
|
||||
#' @title Get number of boosting in a fitted booster
|
||||
#' Get number of boosting in a fitted booster
|
||||
#'
|
||||
#' @param model,x A fitted `xgb.Booster` model.
|
||||
#' @return The number of rounds saved in the model, as an integer.
|
||||
#' @return The number of rounds saved in the model as an integer.
|
||||
#' @details Note that setting booster parameters related to training
|
||||
#' continuation / updates through \link{xgb.parameters<-} will reset the
|
||||
#' continuation / updates through [xgb.parameters<-()] will reset the
|
||||
#' number of rounds to zero.
|
||||
#' @export
|
||||
#' @rdname xgb.get.num.boosted.rounds
|
||||
@ -877,16 +883,19 @@ length.xgb.Booster <- function(x) {
|
||||
return(xgb.get.num.boosted.rounds(x))
|
||||
}
|
||||
|
||||
#' @title Slice Booster by Rounds
|
||||
#' @description Creates a new booster including only a selected range of rounds / iterations
|
||||
#' Slice Booster by Rounds
|
||||
#'
|
||||
#' Creates a new booster including only a selected range of rounds / iterations
|
||||
#' from an existing booster, as given by the sequence `seq(start, end, step)`.
|
||||
#' @details Note that any R attributes that the booster might have, will not be copied into
|
||||
#'
|
||||
#' @details
|
||||
#' Note that any R attributes that the booster might have, will not be copied into
|
||||
#' the resulting object.
|
||||
#'
|
||||
#' @param model,x A fitted `xgb.Booster` object, which is to be sliced by taking only a subset
|
||||
#' of its rounds / iterations.
|
||||
#' @param start Start of the slice (base-1 and inclusive, like R's \link{seq}).
|
||||
#' @param end End of the slice (base-1 and inclusive, like R's \link{seq}).
|
||||
#'
|
||||
#' @param start Start of the slice (base-1 and inclusive, like R's [seq()]).
|
||||
#' @param end End of the slice (base-1 and inclusive, like R's [seq()]).
|
||||
#' Passing a value of zero here is equivalent to passing the full number of rounds in the
|
||||
#' booster object.
|
||||
#' @param step Step size of the slice. Passing '1' will take every round in the sequence defined by
|
||||
@ -894,8 +903,10 @@ length.xgb.Booster <- function(x) {
|
||||
#' @return A sliced booster object containing only the requested rounds.
|
||||
#' @examples
|
||||
#' data(mtcars)
|
||||
#'
|
||||
#' y <- mtcars$mpg
|
||||
#' x <- as.matrix(mtcars[, -1])
|
||||
#'
|
||||
#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
#' model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
|
||||
#' model_slice <- xgb.slice.Booster(model, 1, 3)
|
||||
@ -948,10 +959,12 @@ xgb.slice.Booster <- function(model, start, end = xgb.get.num.boosted.rounds(mod
|
||||
return(xgb.slice.Booster(x, i[1L], i[length(i)], steps[1L]))
|
||||
}
|
||||
|
||||
#' @title Get Features Names from Booster
|
||||
#' @description Returns the feature / variable / column names from a fitted
|
||||
#' booster object, which are set automatically during the call to \link{xgb.train}
|
||||
#' from the DMatrix names, or which can be set manually through \link{setinfo}.
|
||||
#' Get Features Names from Booster
|
||||
#'
|
||||
#' @description
|
||||
#' Returns the feature / variable / column names from a fitted
|
||||
#' booster object, which are set automatically during the call to [xgb.train()]
|
||||
#' from the DMatrix names, or which can be set manually through [setinfo()].
|
||||
#'
|
||||
#' If the object doesn't have feature names, will return `NULL`.
|
||||
#'
|
||||
@ -1002,23 +1015,25 @@ xgb.best_iteration <- function(bst) {
|
||||
return(out)
|
||||
}
|
||||
|
||||
#' @title Extract coefficients from linear booster
|
||||
#' @description Extracts the coefficients from a 'gblinear' booster object,
|
||||
#' as produced by \code{xgb.train} when using parameter `booster="gblinear"`.
|
||||
#' Extract coefficients from linear booster
|
||||
#'
|
||||
#' @description
|
||||
#' Extracts the coefficients from a 'gblinear' booster object,
|
||||
#' as produced by [xgb.train()] when using parameter `booster="gblinear"`.
|
||||
#'
|
||||
#' Note: this function will error out if passing a booster model
|
||||
#' which is not of "gblinear" type.
|
||||
#'
|
||||
#' @param object A fitted booster of 'gblinear' type.
|
||||
#' @param ... Not used.
|
||||
#' @return The extracted coefficients:\itemize{
|
||||
#' \item If there's only one coefficient per column in the data, will be returned as a
|
||||
#' @return The extracted coefficients:
|
||||
#' - If there is only one coefficient per column in the data, will be returned as a
|
||||
#' vector, potentially containing the feature names if available, with the intercept
|
||||
#' as first column.
|
||||
#' \item If there's more than one coefficient per column in the data (e.g. when using
|
||||
#' - If there is more than one coefficient per column in the data (e.g. when using
|
||||
#' `objective="multi:softmax"`), will be returned as a matrix with dimensions equal
|
||||
#' to `[num_features, num_cols]`, with the intercepts as first row. Note that the column
|
||||
#' (classes in multi-class classification) dimension will not be named.
|
||||
#' }
|
||||
#'
|
||||
#' The intercept returned here will include the 'base_score' parameter (unlike the 'bias'
|
||||
#' or the last coefficient in the model dump, which doesn't have 'base_score' added to it),
|
||||
@ -1027,12 +1042,15 @@ xgb.best_iteration <- function(bst) {
|
||||
#'
|
||||
#' Be aware that the coefficients are obtained by first converting them to strings and
|
||||
#' back, so there will always be some very small lose of precision compared to the actual
|
||||
#' coefficients as used by \link{predict.xgb.Booster}.
|
||||
#' coefficients as used by [predict.xgb.Booster].
|
||||
#' @examples
|
||||
#' library(xgboost)
|
||||
#'
|
||||
#' data(mtcars)
|
||||
#'
|
||||
#' y <- mtcars[, 1]
|
||||
#' x <- as.matrix(mtcars[, -1])
|
||||
#'
|
||||
#' dm <- xgb.DMatrix(data = x, label = y, nthread = 1)
|
||||
#' params <- list(booster = "gblinear", nthread = 1)
|
||||
#' model <- xgb.train(data = dm, params = params, nrounds = 2)
|
||||
@ -1088,19 +1106,25 @@ coef.xgb.Booster <- function(object, ...) {
|
||||
return(out)
|
||||
}
|
||||
|
||||
#' @title Deep-copies a Booster Object
|
||||
#' @description Creates a deep copy of an 'xgb.Booster' object, such that the
|
||||
#' Deep-copies a Booster Object
|
||||
#'
|
||||
#' Creates a deep copy of an 'xgb.Booster' object, such that the
|
||||
#' C object pointer contained will be a different object, and hence functions
|
||||
#' like \link{xgb.attr} will not affect the object from which it was copied.
|
||||
#' like [xgb.attr()] will not affect the object from which it was copied.
|
||||
#'
|
||||
#' @param model An 'xgb.Booster' object.
|
||||
#' @return A deep copy of `model` - it will be identical in every way, but C-level
|
||||
#' functions called on that copy will not affect the `model` variable.
|
||||
#' @examples
|
||||
#' library(xgboost)
|
||||
#'
|
||||
#' data(mtcars)
|
||||
#'
|
||||
#' y <- mtcars$mpg
|
||||
#' x <- mtcars[, -1]
|
||||
#'
|
||||
#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
#'
|
||||
#' model <- xgb.train(
|
||||
#' data = dm,
|
||||
#' params = list(nthread = 1),
|
||||
@ -1135,29 +1159,35 @@ xgb.copy.Booster <- function(model) {
|
||||
return(.Call(XGDuplicate_R, model))
|
||||
}
|
||||
|
||||
#' @title Check if two boosters share the same C object
|
||||
#' @description Checks whether two booster objects refer to the same underlying C object.
|
||||
#' @details As booster objects (as returned by e.g. \link{xgb.train}) contain an R 'externalptr'
|
||||
#' Check if two boosters share the same C object
|
||||
#'
|
||||
#' Checks whether two booster objects refer to the same underlying C object.
|
||||
#'
|
||||
#' @details
|
||||
#' As booster objects (as returned by e.g. [xgb.train()]) contain an R 'externalptr'
|
||||
#' object, they don't follow typical copy-on-write semantics of other R objects - that is, if
|
||||
#' one assigns a booster to a different variable and modifies that new variable through in-place
|
||||
#' methods like \link{xgb.attr<-}, the modification will be applied to both the old and the new
|
||||
#' methods like [xgb.attr<-()], the modification will be applied to both the old and the new
|
||||
#' variable, unlike typical R assignments which would only modify the latter.
|
||||
#'
|
||||
#' This function allows checking whether two booster objects share the same 'externalptr',
|
||||
#' regardless of the R attributes that they might have.
|
||||
#'
|
||||
#' In order to duplicate a booster in such a way that the copy wouldn't share the same
|
||||
#' 'externalptr', one can use function \link{xgb.copy.Booster}.
|
||||
#' 'externalptr', one can use function [xgb.copy.Booster()].
|
||||
#' @param obj1 Booster model to compare with `obj2`.
|
||||
#' @param obj2 Booster model to compare with `obj1`.
|
||||
#' @return Either `TRUE` or `FALSE` according to whether the two boosters share
|
||||
#' the underlying C object.
|
||||
#' @seealso \link{xgb.copy.Booster}
|
||||
#' @return Either `TRUE` or `FALSE` according to whether the two boosters share the
|
||||
#' underlying C object.
|
||||
#' @seealso [xgb.copy.Booster()]
|
||||
#' @examples
|
||||
#' library(xgboost)
|
||||
#'
|
||||
#' data(mtcars)
|
||||
#'
|
||||
#' y <- mtcars$mpg
|
||||
#' x <- as.matrix(mtcars[, -1])
|
||||
#'
|
||||
#' model <- xgb.train(
|
||||
#' params = list(nthread = 1),
|
||||
#' data = xgb.DMatrix(x, label = y, nthread = 1),
|
||||
@ -1210,10 +1240,10 @@ xgb.is.same.Booster <- function(obj1, obj2) {
|
||||
#' attr(bst, "myattr") <- "memo"
|
||||
#'
|
||||
#' print(bst)
|
||||
#'
|
||||
#' @method print xgb.Booster
|
||||
#' @export
|
||||
print.xgb.Booster <- function(x, ...) {
|
||||
# this lets it error out when the object comes from an earlier R xgboost version
|
||||
# this lets it error out when the object comes from an earlier R XGBoost version
|
||||
handle <- xgb.get.handle(x)
|
||||
cat('##### xgb.Booster\n')
|
||||
|
||||
|
||||
@ -1,20 +1,15 @@
|
||||
#' Create new features from a previously learned model
|
||||
#'
|
||||
#' May improve the learning by adding new features to the training data based on the decision trees from a previously learned model.
|
||||
#'
|
||||
#' @param model decision tree boosting model learned on the original data
|
||||
#' @param data original data (usually provided as a \code{dgCMatrix} matrix)
|
||||
#' @param ... currently not used
|
||||
#'
|
||||
#' @return \code{dgCMatrix} matrix including both the original data and the new features.
|
||||
#' May improve the learning by adding new features to the training data based on the
|
||||
#' decision trees from a previously learned model.
|
||||
#'
|
||||
#' @details
|
||||
#' This is the function inspired from the paragraph 3.1 of the paper:
|
||||
#'
|
||||
#' \strong{Practical Lessons from Predicting Clicks on Ads at Facebook}
|
||||
#' **Practical Lessons from Predicting Clicks on Ads at Facebook**
|
||||
#'
|
||||
#' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
|
||||
#' Joaquin Quinonero Candela)}
|
||||
#' *(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
|
||||
#' Joaquin Quinonero Candela)*
|
||||
#'
|
||||
#' International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
|
||||
#'
|
||||
@ -33,11 +28,11 @@
|
||||
#' where the first subtree has 3 leafs and the second 2 leafs. If an
|
||||
#' instance ends up in leaf 2 in the first subtree and leaf 1 in
|
||||
#' second subtree, the overall input to the linear classifier will
|
||||
#' be the binary vector \code{[0, 1, 0, 1, 0]}, where the first 3 entries
|
||||
#' be the binary vector `[0, 1, 0, 1, 0]`, where the first 3 entries
|
||||
#' correspond to the leaves of the first subtree and last 2 to
|
||||
#' those of the second subtree.
|
||||
#'
|
||||
#' [...]
|
||||
#' ...
|
||||
#'
|
||||
#' We can understand boosted decision tree
|
||||
#' based transformation as a supervised feature encoding that
|
||||
@ -45,16 +40,23 @@
|
||||
#' vector. A traversal from root node to a leaf node represents
|
||||
#' a rule on certain features."
|
||||
#'
|
||||
#' @param model Decision tree boosting model learned on the original data.
|
||||
#' @param data Original data (usually provided as a `dgCMatrix` matrix).
|
||||
#' @param ... Currently not used.
|
||||
#'
|
||||
#' @return A `dgCMatrix` matrix including both the original data and the new features.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#' data(agaricus.test, package = "xgboost")
|
||||
#'
|
||||
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
#'
|
||||
#' param <- list(max_depth=2, eta=1, objective='binary:logistic')
|
||||
#' param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
|
||||
#' nrounds = 4
|
||||
#'
|
||||
#' bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
|
||||
#' bst <- xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
|
||||
#'
|
||||
#' # Model accuracy without new features
|
||||
#' accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) /
|
||||
|
||||
@ -1,36 +1,44 @@
|
||||
#' Dump an xgboost model in text format.
|
||||
#' Dump an XGBoost model in text format.
|
||||
#'
|
||||
#' Dump an xgboost model in text format.
|
||||
#' Dump an XGBoost model in text format.
|
||||
#'
|
||||
#' @param model the model object.
|
||||
#' @param fname the name of the text file where to save the model text dump.
|
||||
#' If not provided or set to \code{NULL}, the model is returned as a \code{character} vector.
|
||||
#' @param fmap feature map file representing feature types.
|
||||
#' See demo/ for walkthrough example in R, and
|
||||
#' \url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt}
|
||||
#' for example Format.
|
||||
#' @param with_stats whether to dump some additional statistics about the splits.
|
||||
#' @param model The model object.
|
||||
#' @param fname The name of the text file where to save the model text dump.
|
||||
#' If not provided or set to `NULL`, the model is returned as a character vector.
|
||||
#' @param fmap Feature map file representing feature types. See demo/ for a walkthrough
|
||||
#' example in R, and \url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt}
|
||||
#' to see an example of the value.
|
||||
#' @param with_stats Whether to dump some additional statistics about the splits.
|
||||
#' When this option is on, the model dump contains two additional values:
|
||||
#' gain is the approximate loss function gain we get in each split;
|
||||
#' cover is the sum of second order gradient in each node.
|
||||
#' @param dump_format either 'text', 'json', or 'dot' (graphviz) format could be specified.
|
||||
#' @param dump_format Either 'text', 'json', or 'dot' (graphviz) format could be specified.
|
||||
#'
|
||||
#' Format 'dot' for a single tree can be passed directly to packages that consume this format
|
||||
#' for graph visualization, such as function [DiagrammeR::grViz()]
|
||||
#' @param ... currently not used
|
||||
#' for graph visualization, such as function `DiagrammeR::grViz()`
|
||||
#' @param ... Currently not used
|
||||
#'
|
||||
#' @return
|
||||
#' If fname is not provided or set to \code{NULL} the function will return the model
|
||||
#' as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
||||
#' If fname is not provided or set to `NULL` the function will return the model
|
||||
#' as a character vector. Otherwise it will return `TRUE`.
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#' data(agaricus.test, package = "xgboost")
|
||||
#'
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
|
||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
#'
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(train$data, label = train$label),
|
||||
#' max_depth = 2,
|
||||
#' eta = 1,
|
||||
#' nthread = 2,
|
||||
#' nrounds = 2,
|
||||
#' objective = "binary:logistic"
|
||||
#' )
|
||||
#'
|
||||
#' # save the model in file 'xgb.model.dump'
|
||||
#' dump_path = file.path(tempdir(), 'model.dump')
|
||||
#' xgb.dump(bst, dump_path, with_stats = TRUE)
|
||||
@ -39,7 +47,7 @@
|
||||
#' print(xgb.dump(bst, with_stats = TRUE))
|
||||
#'
|
||||
#' # print in JSON format:
|
||||
#' cat(xgb.dump(bst, with_stats = TRUE, dump_format='json'))
|
||||
#' cat(xgb.dump(bst, with_stats = TRUE, dump_format = "json"))
|
||||
#'
|
||||
#' # plot first tree leveraging the 'dot' format
|
||||
#' if (requireNamespace('DiagrammeR', quietly = TRUE)) {
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
# ggplot backend for the xgboost plotting facilities
|
||||
|
||||
|
||||
#' @rdname xgb.plot.importance
|
||||
#' @export
|
||||
xgb.ggplot.importance <- function(importance_matrix = NULL, top_n = NULL, measure = NULL,
|
||||
@ -135,8 +134,7 @@ xgb.ggplot.shap.summary <- function(data, shap_contrib = NULL, features = NULL,
|
||||
#' @param data_list The result of `xgb.shap.data()`.
|
||||
#' @param normalize Whether to standardize feature values to mean 0 and
|
||||
#' standard deviation 1. This is useful for comparing multiple features on the same
|
||||
#' plot. Default is \code{FALSE}.
|
||||
#'
|
||||
#' plot. Default is `FALSE`.
|
||||
#' @return A `data.table` containing the observation ID, the feature name, the
|
||||
#' feature value (normalized if specified), and the SHAP contribution value.
|
||||
#' @noRd
|
||||
@ -167,7 +165,6 @@ prepare.ggplot.shap.data <- function(data_list, normalize = FALSE) {
|
||||
#' Useful to compare multiple features on the same plot.
|
||||
#'
|
||||
#' @param x Numeric vector.
|
||||
#'
|
||||
#' @return Numeric vector with mean 0 and standard deviation 1.
|
||||
#' @noRd
|
||||
#' @keywords internal
|
||||
|
||||
@ -2,6 +2,13 @@
|
||||
#'
|
||||
#' Creates a `data.table` of feature importances.
|
||||
#'
|
||||
#' @details
|
||||
#' This function works for both linear and tree models.
|
||||
#'
|
||||
#' For linear models, the importance is the absolute magnitude of linear coefficients.
|
||||
#' To obtain a meaningful ranking by importance for linear models, the features need to
|
||||
#' be on the same scale (which is also recommended when using L1 or L2 regularization).
|
||||
#'
|
||||
#' @param feature_names Character vector used to overwrite the feature names
|
||||
#' of the model. The default is `NULL` (use original feature names).
|
||||
#' @param model Object of class `xgb.Booster`.
|
||||
@ -14,15 +21,6 @@
|
||||
#' @param data Deprecated.
|
||||
#' @param label Deprecated.
|
||||
#' @param target Deprecated.
|
||||
#'
|
||||
#' @details
|
||||
#'
|
||||
#' This function works for both linear and tree models.
|
||||
#'
|
||||
#' For linear models, the importance is the absolute magnitude of linear coefficients.
|
||||
#' To obtain a meaningful ranking by importance for linear models, the features need to
|
||||
#' be on the same scale (which is also recommended when using L1 or L2 regularization).
|
||||
#'
|
||||
#' @return A `data.table` with the following columns:
|
||||
#'
|
||||
#' For a tree model:
|
||||
|
||||
@ -1,28 +1,27 @@
|
||||
#' Load xgboost model from binary file
|
||||
#' Load XGBoost model from binary file
|
||||
#'
|
||||
#' Load xgboost model from the binary model file.
|
||||
#' Load XGBoost model from binary model file.
|
||||
#'
|
||||
#' @param modelfile the name of the binary input file.
|
||||
#' @param modelfile The name of the binary input file.
|
||||
#'
|
||||
#' @details
|
||||
#' The input file is expected to contain a model saved in an xgboost model format
|
||||
#' using either \code{\link{xgb.save}} or \code{\link{xgb.cb.save.model}} in R, or using some
|
||||
#' appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
|
||||
#' saved from there in xgboost format, could be loaded from R.
|
||||
#' The input file is expected to contain a model saved in an XGBoost model format
|
||||
#' using either [xgb.save()] in R, or using some
|
||||
#' appropriate methods from other XGBoost interfaces. E.g., a model trained in Python and
|
||||
#' saved from there in XGBoost format, could be loaded from R.
|
||||
#'
|
||||
#' Note: a model saved as an R-object, has to be loaded using corresponding R-methods,
|
||||
#' not \code{xgb.load}.
|
||||
#' Note: a model saved as an R object has to be loaded using corresponding R-methods,
|
||||
#' not by [xgb.load()].
|
||||
#'
|
||||
#' @return
|
||||
#' An object of \code{xgb.Booster} class.
|
||||
#' An object of `xgb.Booster` class.
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{xgb.save}}
|
||||
#' @seealso [xgb.save()]
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#' data(agaricus.test, package = "xgboost")
|
||||
#'
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
@ -30,6 +29,7 @@
|
||||
#'
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#'
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(train$data, label = train$label),
|
||||
#' max_depth = 2,
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
#' Load serialised xgboost model from R's raw vector
|
||||
#' Load serialised XGBoost model from R's raw vector
|
||||
#'
|
||||
#' User can generate raw memory buffer by calling xgb.save.raw
|
||||
#' User can generate raw memory buffer by calling [xgb.save.raw()].
|
||||
#'
|
||||
#' @param buffer the buffer returned by xgb.save.raw
|
||||
#' @param buffer The buffer returned by [xgb.save.raw()].
|
||||
#' @export
|
||||
xgb.load.raw <- function(buffer) {
|
||||
cachelist <- list()
|
||||
|
||||
@ -2,13 +2,12 @@
|
||||
#'
|
||||
#' Parse a boosted tree model text dump into a `data.table` structure.
|
||||
#'
|
||||
#' @param model Object of class `xgb.Booster`. If it contains feature names (they can be set through
|
||||
#' \link{setinfo}), they will be used in the output from this function.
|
||||
#' @param model Object of class `xgb.Booster`. If it contains feature names (they can
|
||||
#' be set through [setinfo()]), they will be used in the output from this function.
|
||||
#' @param text Character vector previously generated by the function [xgb.dump()]
|
||||
#' (called with parameter `with_stats = TRUE`). `text` takes precedence over `model`.
|
||||
#' @param trees An integer vector of tree indices that should be used.
|
||||
#' The default (`NULL`) uses all trees.
|
||||
#' Useful, e.g., in multiclass classification to get only
|
||||
#' @param trees An integer vector of tree indices that should be used. The default
|
||||
#' (`NULL`) uses all trees. Useful, e.g., in multiclass classification to get only
|
||||
#' the trees of one class. *Important*: the tree index in XGBoost models
|
||||
#' is zero-based (e.g., use `trees = 0:4` for the first five trees).
|
||||
#' @param use_int_id A logical flag indicating whether nodes in columns "Yes", "No", and
|
||||
@ -195,7 +194,7 @@ xgb.model.dt.tree <- function(model = NULL, text = NULL,
|
||||
td[order(Tree, Node)]
|
||||
}
|
||||
|
||||
# Avoid error messages during CRAN check.
|
||||
# Avoid notes during CRAN check.
|
||||
# The reason is that these variables are never declared
|
||||
# They are mainly column names inferred by Data.table...
|
||||
globalVariables(c("Tree", "Node", "ID", "Feature", "t", "isLeaf", ".SD", ".SDcols"))
|
||||
|
||||
@ -4,7 +4,8 @@
|
||||
#' - `xgb.plot.deepness()` uses base R graphics, while
|
||||
#' - `xgb.ggplot.deepness()` uses "ggplot2".
|
||||
#'
|
||||
#' @param model Either an `xgb.Booster` model, or the "data.table" returned by [xgb.model.dt.tree()].
|
||||
#' @param model Either an `xgb.Booster` model, or the "data.table" returned
|
||||
#' by [xgb.model.dt.tree()].
|
||||
#' @param which Which distribution to plot (see details).
|
||||
#' @param plot Should the plot be shown? Default is `TRUE`.
|
||||
#' @param ... Other parameters passed to [graphics::barplot()] or [graphics::plot()].
|
||||
|
||||
@ -4,6 +4,21 @@
|
||||
#' - `xgb.plot.importance()` uses base R graphics, while
|
||||
#' - `xgb.ggplot.importance()` uses "ggplot".
|
||||
#'
|
||||
#' @details
|
||||
#' The graph represents each feature as a horizontal bar of length proportional to the
|
||||
#' importance of a feature. Features are sorted by decreasing importance.
|
||||
#' It works for both "gblinear" and "gbtree" models.
|
||||
#'
|
||||
#' When `rel_to_first = FALSE`, the values would be plotted as in `importance_matrix`.
|
||||
#' For a "gbtree" model, that would mean being normalized to the total of 1
|
||||
#' ("what is feature's importance contribution relative to the whole model?").
|
||||
#' For linear models, `rel_to_first = FALSE` would show actual values of the coefficients.
|
||||
#' Setting `rel_to_first = TRUE` allows to see the picture from the perspective of
|
||||
#' "what is feature's importance contribution relative to the most important feature?"
|
||||
#'
|
||||
#' The "ggplot" backend performs 1-D clustering of the importance values,
|
||||
#' with bar colors corresponding to different clusters having similar importance values.
|
||||
#'
|
||||
#' @param importance_matrix A `data.table` as returned by [xgb.importance()].
|
||||
#' @param top_n Maximal number of top features to include into the plot.
|
||||
#' @param measure The name of importance measure to plot.
|
||||
@ -19,22 +34,6 @@
|
||||
#' @param ... Other parameters passed to [graphics::barplot()]
|
||||
#' (except `horiz`, `border`, `cex.names`, `names.arg`, and `las`).
|
||||
#' Only used in `xgb.plot.importance()`.
|
||||
#'
|
||||
#' @details
|
||||
#' The graph represents each feature as a horizontal bar of length proportional to the importance of a feature.
|
||||
#' Features are sorted by decreasing importance.
|
||||
#' It works for both "gblinear" and "gbtree" models.
|
||||
#'
|
||||
#' When `rel_to_first = FALSE`, the values would be plotted as in `importance_matrix`.
|
||||
#' For a "gbtree" model, that would mean being normalized to the total of 1
|
||||
#' ("what is feature's importance contribution relative to the whole model?").
|
||||
#' For linear models, `rel_to_first = FALSE` would show actual values of the coefficients.
|
||||
#' Setting `rel_to_first = TRUE` allows to see the picture from the perspective of
|
||||
#' "what is feature's importance contribution relative to the most important feature?"
|
||||
#'
|
||||
#' The "ggplot" backend performs 1-D clustering of the importance values,
|
||||
#' with bar colors corresponding to different clusters having similar importance values.
|
||||
#'
|
||||
#' @return
|
||||
#' The return value depends on the function:
|
||||
#' - `xgb.plot.importance()`: Invisibly, a "data.table" with `n_top` features sorted
|
||||
|
||||
@ -2,12 +2,7 @@
|
||||
#'
|
||||
#' Visualization of the ensemble of trees as a single collective unit.
|
||||
#'
|
||||
#' @inheritParams xgb.plot.tree
|
||||
#' @param features_keep Number of features to keep in each position of the multi trees,
|
||||
#' by default 5.
|
||||
#'
|
||||
#' @details
|
||||
#'
|
||||
#' This function tries to capture the complexity of a gradient boosted tree model
|
||||
#' in a cohesive way by compressing an ensemble of trees into a single tree-graph representation.
|
||||
#' The goal is to improve the interpretability of a model generally seen as black box.
|
||||
@ -25,6 +20,9 @@
|
||||
#' This function is inspired by this blog post:
|
||||
#' <https://wellecks.wordpress.com/2015/02/21/peering-into-the-black-box-visualizing-lambdamart/>
|
||||
#'
|
||||
#' @inheritParams xgb.plot.tree
|
||||
#' @param features_keep Number of features to keep in each position of the multi trees,
|
||||
#' by default 5.
|
||||
#' @inherit xgb.plot.tree return
|
||||
#'
|
||||
#' @examples
|
||||
|
||||
@ -5,11 +5,10 @@
|
||||
#' @param data The data to explain as a `matrix` or `dgCMatrix`.
|
||||
#' @param shap_contrib Matrix of SHAP contributions of `data`.
|
||||
#' The default (`NULL`) computes it from `model` and `data`.
|
||||
#' @param features Vector of column indices or feature names to plot.
|
||||
#' When `NULL` (default), the `top_n` most important features are selected
|
||||
#' by [xgb.importance()].
|
||||
#' @param features Vector of column indices or feature names to plot. When `NULL`
|
||||
#' (default), the `top_n` most important features are selected by [xgb.importance()].
|
||||
#' @param top_n How many of the most important features (<= 100) should be selected?
|
||||
#' By default 1 for SHAP dependence and 10 for SHAP summary).
|
||||
#' By default 1 for SHAP dependence and 10 for SHAP summary.
|
||||
#' Only used when `features = NULL`.
|
||||
#' @param model An `xgb.Booster` model. Only required when `shap_contrib = NULL` or
|
||||
#' `features = NULL`.
|
||||
@ -120,6 +119,7 @@
|
||||
#' )
|
||||
#' trees0 <- seq(from = 0, by = nclass, length.out = nrounds)
|
||||
#' col <- rgb(0, 0, 1, 0.5)
|
||||
#'
|
||||
#' xgb.plot.shap(
|
||||
#' x,
|
||||
#' model = mbst,
|
||||
|
||||
@ -2,36 +2,7 @@
|
||||
#'
|
||||
#' Read a tree model text dump and plot the model.
|
||||
#'
|
||||
#' @param model Object of class `xgb.Booster`. If it contains feature names (they can be set through
|
||||
#' \link{setinfo}), they will be used in the output from this function.
|
||||
#' @param trees An integer vector of tree indices that should be used.
|
||||
#' The default (`NULL`) uses all trees.
|
||||
#' Useful, e.g., in multiclass classification to get only
|
||||
#' the trees of one class. *Important*: the tree index in XGBoost models
|
||||
#' is zero-based (e.g., use `trees = 0:2` for the first three trees).
|
||||
#' @param plot_width,plot_height Width and height of the graph in pixels.
|
||||
#' The values are passed to [DiagrammeR::render_graph()].
|
||||
#' @param render Should the graph be rendered or not? The default is `TRUE`.
|
||||
#' @param show_node_id a logical flag for whether to show node id's in the graph.
|
||||
#' @param style Style to use for the plot. Options are:\itemize{
|
||||
#' \item `"xgboost"`: will use the plot style defined in the core XGBoost library,
|
||||
#' which is shared between different interfaces through the 'dot' format. This
|
||||
#' style was not available before version 2.1.0 in R. It always plots the trees
|
||||
#' vertically (from top to bottom).
|
||||
#' \item `"R"`: will use the style defined from XGBoost's R interface, which predates
|
||||
#' the introducition of the standardized style from the core library. It might plot
|
||||
#' the trees horizontally (from left to right).
|
||||
#' }
|
||||
#'
|
||||
#' Note that `style="xgboost"` is only supported when all of the following conditions are met:\itemize{
|
||||
#' \item Only a single tree is being plotted.
|
||||
#' \item Node IDs are not added to the graph.
|
||||
#' \item The graph is being returned as `htmlwidget` (`render=TRUE`).
|
||||
#' }
|
||||
#' @param ... currently not used.
|
||||
#'
|
||||
#' @details
|
||||
#'
|
||||
#' When using `style="xgboost"`, the content of each node is visualized as follows:
|
||||
#' - For non-terminal nodes, it will display the split condition (number or name if
|
||||
#' available, and the condition that would decide to which node to go next).
|
||||
@ -56,6 +27,31 @@
|
||||
#'
|
||||
#' This function uses [GraphViz](https://www.graphviz.org/) as DiagrammeR backend.
|
||||
#'
|
||||
#' @param model Object of class `xgb.Booster`. If it contains feature names (they can be set through
|
||||
#' \link{setinfo}), they will be used in the output from this function.
|
||||
#' @param trees An integer vector of tree indices that should be used.
|
||||
#' The default (`NULL`) uses all trees.
|
||||
#' Useful, e.g., in multiclass classification to get only
|
||||
#' the trees of one class. *Important*: the tree index in XGBoost models
|
||||
#' is zero-based (e.g., use `trees = 0:2` for the first three trees).
|
||||
#' @param plot_width,plot_height Width and height of the graph in pixels.
|
||||
#' The values are passed to `DiagrammeR::render_graph()`.
|
||||
#' @param render Should the graph be rendered or not? The default is `TRUE`.
|
||||
#' @param show_node_id a logical flag for whether to show node id's in the graph.
|
||||
#' @param style Style to use for the plot:
|
||||
#' - `"xgboost"`: will use the plot style defined in the core XGBoost library,
|
||||
#' which is shared between different interfaces through the 'dot' format. This
|
||||
#' style was not available before version 2.1.0 in R. It always plots the trees
|
||||
#' vertically (from top to bottom).
|
||||
#' - `"R"`: will use the style defined from XGBoost's R interface, which predates
|
||||
#' the introducition of the standardized style from the core library. It might plot
|
||||
#' the trees horizontally (from left to right).
|
||||
#'
|
||||
#' Note that `style="xgboost"` is only supported when all of the following conditions are met:
|
||||
#' - Only a single tree is being plotted.
|
||||
#' - Node IDs are not added to the graph.
|
||||
#' - The graph is being returned as `htmlwidget` (`render=TRUE`).
|
||||
#' @param ... Currently not used.
|
||||
#' @return
|
||||
#' The value depends on the `render` parameter:
|
||||
#' - If `render = TRUE` (default): Rendered graph object which is an htmlwidget of
|
||||
@ -63,7 +59,7 @@
|
||||
#' running from the command line.
|
||||
#' - If `render = FALSE`: Graph object which is of DiagrammeR's class `dgr_graph`.
|
||||
#' This could be useful if one wants to modify some of the graph attributes
|
||||
#' before rendering the graph with [DiagrammeR::render_graph()].
|
||||
#' before rendering the graph with `DiagrammeR::render_graph()`.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
|
||||
@ -1,43 +1,39 @@
|
||||
#' Save xgboost model to binary file
|
||||
#' Save XGBoost model to binary file
|
||||
#'
|
||||
#' Save xgboost model to a file in binary or JSON format.
|
||||
#' Save XGBoost model to a file in binary or JSON format.
|
||||
#'
|
||||
#' @param model Model object of \code{xgb.Booster} class.
|
||||
#' @param fname Name of the file to write.
|
||||
#'
|
||||
#' Note that the extension of this file name determined the serialization format to use:\itemize{
|
||||
#' \item Extension ".ubj" will use the universal binary JSON format (recommended).
|
||||
#' @param fname Name of the file to write. Its extension determines the serialization format:
|
||||
#' - ".ubj": Use the universal binary JSON format (recommended).
|
||||
#' This format uses binary types for e.g. floating point numbers, thereby preventing any loss
|
||||
#' of precision when converting to a human-readable JSON text or similar.
|
||||
#' \item Extension ".json" will use plain JSON, which is a human-readable format.
|
||||
#' \item Extension ".deprecated" will use a \bold{deprecated} binary format. This format will
|
||||
#' - ".json": Use plain JSON, which is a human-readable format.
|
||||
#' - ".deprecated": Use **deprecated** binary format. This format will
|
||||
#' not be able to save attributes introduced after v1 of XGBoost, such as the "best_iteration"
|
||||
#' attribute that boosters might keep, nor feature names or user-specifiec attributes.
|
||||
#' \item If the format is not specified by passing one of the file extensions above, will
|
||||
#' - If the format is not specified by passing one of the file extensions above, will
|
||||
#' default to UBJ.
|
||||
#' }
|
||||
#'
|
||||
#' @details
|
||||
#' This methods allows to save a model in an xgboost-internal binary or text format which is universal
|
||||
#' among the various xgboost interfaces. In R, the saved model file could be read-in later
|
||||
#' using either the \code{\link{xgb.load}} function or the \code{xgb_model} parameter
|
||||
#' of \code{\link{xgb.train}}.
|
||||
#'
|
||||
#' Note: a model can also be saved as an R-object (e.g., by using \code{\link[base]{readRDS}}
|
||||
#' or \code{\link[base]{save}}). However, it would then only be compatible with R, and
|
||||
#' corresponding R-methods would need to be used to load it. Moreover, persisting the model with
|
||||
#' \code{\link[base]{readRDS}} or \code{\link[base]{save}}) might cause compatibility problems in
|
||||
#' future versions of XGBoost. Consult \code{\link{a-compatibility-note-for-saveRDS-save}} to learn
|
||||
#' how to persist models in a future-proof way, i.e. to make the model accessible in future
|
||||
#' This methods allows to save a model in an XGBoost-internal binary or text format which is universal
|
||||
#' among the various xgboost interfaces. In R, the saved model file could be read later
|
||||
#' using either the [xgb.load()] function or the `xgb_model` parameter of [xgb.train()].
|
||||
#'
|
||||
#' Note: a model can also be saved as an R object (e.g., by using [readRDS()]
|
||||
#' or [save()]). However, it would then only be compatible with R, and
|
||||
#' corresponding R methods would need to be used to load it. Moreover, persisting the model with
|
||||
#' [readRDS()] or [save()] might cause compatibility problems in
|
||||
#' future versions of XGBoost. Consult [a-compatibility-note-for-saveRDS-save] to learn
|
||||
#' how to persist models in a future-proof way, i.e., to make the model accessible in future
|
||||
#' releases of XGBoost.
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{xgb.load}}
|
||||
#' @seealso [xgb.load()]
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#' data(agaricus.test, package = "xgboost")
|
||||
#'
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
@ -45,6 +41,7 @@
|
||||
#'
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#'
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(train$data, label = train$label),
|
||||
#' max_depth = 2,
|
||||
@ -53,6 +50,7 @@
|
||||
#' nrounds = 2,
|
||||
#' objective = "binary:logistic"
|
||||
#' )
|
||||
#'
|
||||
#' fname <- file.path(tempdir(), "xgb.ubj")
|
||||
#' xgb.save(bst, fname)
|
||||
#' bst <- xgb.load(fname)
|
||||
|
||||
@ -1,29 +1,34 @@
|
||||
#' Save xgboost model to R's raw vector,
|
||||
#' user can call xgb.load.raw to load the model back from raw vector
|
||||
#' Save XGBoost model to R's raw vector
|
||||
#'
|
||||
#' Save xgboost model from xgboost or xgb.train
|
||||
#' Save XGBoost model from [xgboost()] or [xgb.train()].
|
||||
#' Call [xgb.load.raw()] to load the model back from raw vector.
|
||||
#'
|
||||
#' @param model the model object.
|
||||
#' @param raw_format The format for encoding the booster. Available options are
|
||||
#' \itemize{
|
||||
#' \item \code{json}: Encode the booster into JSON text document.
|
||||
#' \item \code{ubj}: Encode the booster into Universal Binary JSON.
|
||||
#' \item \code{deprecated}: Encode the booster into old customized binary format.
|
||||
#' }
|
||||
#' @param model The model object.
|
||||
#' @param raw_format The format for encoding the booster:
|
||||
#' - "json": Encode the booster into JSON text document.
|
||||
#' - "ubj": Encode the booster into Universal Binary JSON.
|
||||
#' - "deprecated": Encode the booster into old customized binary format.
|
||||
#'
|
||||
#' @examples
|
||||
#' \dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' data(agaricus.train, package = "xgboost")
|
||||
#' data(agaricus.test, package = "xgboost")
|
||||
#'
|
||||
#' ## Keep the number of threads to 2 for examples
|
||||
#' nthread <- 2
|
||||
#' ## Keep the number of threads to 1 for examples
|
||||
#' nthread <- 1
|
||||
#' data.table::setDTthreads(nthread)
|
||||
#'
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#' bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
|
||||
#' eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||
#'
|
||||
#' bst <- xgb.train(
|
||||
#' data = xgb.DMatrix(train$data, label = train$label),
|
||||
#' max_depth = 2,
|
||||
#' eta = 1,
|
||||
#' nthread = nthread,
|
||||
#' nrounds = 2,
|
||||
#' objective = "binary:logistic"
|
||||
#' )
|
||||
#'
|
||||
#' raw <- xgb.save.raw(bst)
|
||||
#' bst <- xgb.load.raw(raw)
|
||||
|
||||
@ -944,6 +944,7 @@ xgboost <- function(
|
||||
return(model)
|
||||
}
|
||||
|
||||
#' @method print xgboost
|
||||
#' @export
|
||||
print.xgboost <- function(x, ...) {
|
||||
cat("XGBoost model object\n")
|
||||
|
||||
@ -5,66 +5,77 @@
|
||||
\title{Model Serialization and Compatibility}
|
||||
\description{
|
||||
When it comes to serializing XGBoost models, it's possible to use R serializers such as
|
||||
\link{save} or \link{saveRDS} to serialize an XGBoost R model, but XGBoost also provides
|
||||
\code{\link[=save]{save()}} or \code{\link[=saveRDS]{saveRDS()}} to serialize an XGBoost R model, but XGBoost also provides
|
||||
its own serializers with better compatibility guarantees, which allow loading
|
||||
said models in other language bindings of XGBoost.
|
||||
|
||||
Note that an \code{xgb.Booster} object, outside of its core components, might also keep:\itemize{
|
||||
\item Additional model configuration (accessible through \link{xgb.config}),
|
||||
which includes model fitting parameters like \code{max_depth} and runtime parameters like \code{nthread}.
|
||||
Note that an \code{xgb.Booster} object, outside of its core components, might also keep:
|
||||
\itemize{
|
||||
\item Additional model configuration (accessible through \code{\link[=xgb.config]{xgb.config()}}), which includes
|
||||
model fitting parameters like \code{max_depth} and runtime parameters like \code{nthread}.
|
||||
These are not necessarily useful for prediction/importance/plotting.
|
||||
\item Additional R-specific attributes - e.g. results of callbacks, such as evaluation logs,
|
||||
which are kept as a \code{data.table} object, accessible through \code{attributes(model)$evaluation_log}
|
||||
if present.
|
||||
\item Additional R specific attributes - e.g. results of callbacks, such as evaluation logs,
|
||||
which are kept as a \code{data.table} object, accessible through
|
||||
\code{attributes(model)$evaluation_log} if present.
|
||||
}
|
||||
|
||||
The first one (configurations) does not have the same compatibility guarantees as
|
||||
the model itself, including attributes that are set and accessed through \link{xgb.attributes} - that is, such configuration
|
||||
might be lost after loading the booster in a different XGBoost version, regardless of the
|
||||
serializer that was used. These are saved when using \link{saveRDS}, but will be discarded
|
||||
if loaded into an incompatible XGBoost version. They are not saved when using XGBoost's
|
||||
serializers from its public interface including \link{xgb.save} and \link{xgb.save.raw}.
|
||||
the model itself, including attributes that are set and accessed through
|
||||
\code{\link[=xgb.attributes]{xgb.attributes()}} - that is, such configuration might be lost after loading the
|
||||
booster in a different XGBoost version, regardless of the serializer that was used.
|
||||
These are saved when using \code{\link[=saveRDS]{saveRDS()}}, but will be discarded if loaded into an
|
||||
incompatible XGBoost version. They are not saved when using XGBoost's
|
||||
serializers from its public interface including \code{\link[=xgb.save]{xgb.save()}} and \code{\link[=xgb.save.raw]{xgb.save.raw()}}.
|
||||
|
||||
The second ones (R attributes) are not part of the standard XGBoost model structure, and thus are
|
||||
not saved when using XGBoost's own serializers. These attributes are only used for informational
|
||||
purposes, such as keeping track of evaluation metrics as the model was fit, or saving the R
|
||||
call that produced the model, but are otherwise not used for prediction / importance / plotting / etc.
|
||||
The second ones (R attributes) are not part of the standard XGBoost model structure,
|
||||
and thus are not saved when using XGBoost's own serializers. These attributes are
|
||||
only used for informational purposes, such as keeping track of evaluation metrics as
|
||||
the model was fit, or saving the R call that produced the model, but are otherwise
|
||||
not used for prediction / importance / plotting / etc.
|
||||
These R attributes are only preserved when using R's serializers.
|
||||
|
||||
Note that XGBoost models in R starting from version \verb{2.1.0} and onwards, and XGBoost models
|
||||
before version \verb{2.1.0}; have a very different R object structure and are incompatible with
|
||||
each other. Hence, models that were saved with R serializers live \code{saveRDS} or \code{save} before
|
||||
version \verb{2.1.0} will not work with latter \code{xgboost} versions and vice versa. Be aware that
|
||||
the structure of R model objects could in theory change again in the future, so XGBoost's serializers
|
||||
Note that XGBoost models in R starting from version \verb{2.1.0} and onwards, and
|
||||
XGBoost models before version \verb{2.1.0}; have a very different R object structure and
|
||||
are incompatible with each other. Hence, models that were saved with R serializers
|
||||
like \code{\link[=saveRDS]{saveRDS()}} or \code{\link[=save]{save()}} before version \verb{2.1.0} will not work with latter
|
||||
\code{xgboost} versions and vice versa. Be aware that the structure of R model objects
|
||||
could in theory change again in the future, so XGBoost's serializers
|
||||
should be preferred for long-term storage.
|
||||
|
||||
Furthermore, note that using the package \code{qs} for serialization will require version 0.26 or
|
||||
higher of said package, and will have the same compatibility restrictions as R serializers.
|
||||
Furthermore, note that using the package \code{qs} for serialization will require
|
||||
version 0.26 or higher of said package, and will have the same compatibility
|
||||
restrictions as R serializers.
|
||||
}
|
||||
\details{
|
||||
Use \code{\link{xgb.save}} to save the XGBoost model as a stand-alone file. You may opt into
|
||||
Use \code{\link[=xgb.save]{xgb.save()}} to save the XGBoost model as a stand-alone file. You may opt into
|
||||
the JSON format by specifying the JSON extension. To read the model back, use
|
||||
\code{\link{xgb.load}}.
|
||||
\code{\link[=xgb.load]{xgb.load()}}.
|
||||
|
||||
Use \code{\link{xgb.save.raw}} to save the XGBoost model as a sequence (vector) of raw bytes
|
||||
Use \code{\link[=xgb.save.raw]{xgb.save.raw()}} to save the XGBoost model as a sequence (vector) of raw bytes
|
||||
in a future-proof manner. Future releases of XGBoost will be able to read the raw bytes and
|
||||
re-construct the corresponding model. To read the model back, use \code{\link{xgb.load.raw}}.
|
||||
The \code{\link{xgb.save.raw}} function is useful if you'd like to persist the XGBoost model
|
||||
re-construct the corresponding model. To read the model back, use \code{\link[=xgb.load.raw]{xgb.load.raw()}}.
|
||||
The \code{\link[=xgb.save.raw]{xgb.save.raw()}} function is useful if you would like to persist the XGBoost model
|
||||
as part of another R object.
|
||||
|
||||
Use \link{saveRDS} if you require the R-specific attributes that a booster might have, such
|
||||
Use \code{\link[=saveRDS]{saveRDS()}} if you require the R-specific attributes that a booster might have, such
|
||||
as evaluation logs, but note that future compatibility of such objects is outside XGBoost's
|
||||
control as it relies on R's serialization format (see e.g. the details section in
|
||||
\link{serialize} and \link{save} from base R).
|
||||
\link{serialize} and \code{\link[=save]{save()}} from base R).
|
||||
|
||||
For more details and explanation about model persistence and archival, consult the page
|
||||
\url{https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html}.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
bst <- xgb.train(data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
||||
max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
|
||||
objective = "binary:logistic")
|
||||
data(agaricus.train, package = "xgboost")
|
||||
|
||||
bst <- xgb.train(
|
||||
data = xgb.DMatrix(agaricus.train$data, label = agaricus.train$label),
|
||||
max_depth = 2,
|
||||
eta = 1,
|
||||
nthread = 2,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
|
||||
# Save as a stand-alone file; load it with xgb.load()
|
||||
fname <- file.path(tempdir(), "xgb_model.ubj")
|
||||
|
||||
@ -12,11 +12,12 @@
|
||||
\item{...}{Not used.}
|
||||
}
|
||||
\value{
|
||||
The extracted coefficients:\itemize{
|
||||
\item If there's only one coefficient per column in the data, will be returned as a
|
||||
The extracted coefficients:
|
||||
\itemize{
|
||||
\item If there is only one coefficient per column in the data, will be returned as a
|
||||
vector, potentially containing the feature names if available, with the intercept
|
||||
as first column.
|
||||
\item If there's more than one coefficient per column in the data (e.g. when using
|
||||
\item If there is more than one coefficient per column in the data (e.g. when using
|
||||
\code{objective="multi:softmax"}), will be returned as a matrix with dimensions equal
|
||||
to \verb{[num_features, num_cols]}, with the intercepts as first row. Note that the column
|
||||
(classes in multi-class classification) dimension will not be named.
|
||||
@ -33,16 +34,19 @@ coefficients as used by \link{predict.xgb.Booster}.
|
||||
}
|
||||
\description{
|
||||
Extracts the coefficients from a 'gblinear' booster object,
|
||||
as produced by \code{xgb.train} when using parameter \code{booster="gblinear"}.
|
||||
as produced by \code{\link[=xgb.train]{xgb.train()}} when using parameter \code{booster="gblinear"}.
|
||||
|
||||
Note: this function will error out if passing a booster model
|
||||
which is not of "gblinear" type.
|
||||
}
|
||||
\examples{
|
||||
library(xgboost)
|
||||
|
||||
data(mtcars)
|
||||
|
||||
y <- mtcars[, 1]
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
|
||||
dm <- xgb.DMatrix(data = x, label = y, nthread = 1)
|
||||
params <- list(booster = "gblinear", nthread = 1)
|
||||
model <- xgb.train(data = dm, params = params, nrounds = 2)
|
||||
|
||||
@ -28,35 +28,36 @@
|
||||
\item{newdata}{Takes \code{data.frame}, \code{matrix}, \code{dgCMatrix}, \code{dgRMatrix}, \code{dsparseVector},
|
||||
local data file, or \code{xgb.DMatrix}.
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ For single-row predictions on sparse data, it's recommended to use CSR format. If passing
|
||||
a sparse vector, it will take it as a row vector.
|
||||
For single-row predictions on sparse data, it is recommended to use CSR format. If passing
|
||||
a sparse vector, it will take it as a row vector.
|
||||
|
||||
Note that, for repeated predictions on the same data, one might want to create a DMatrix to
|
||||
pass here instead of passing R types like matrices or data frames, as predictions will be
|
||||
faster on DMatrix.
|
||||
Note that, for repeated predictions on the same data, one might want to create a DMatrix to
|
||||
pass here instead of passing R types like matrices or data frames, as predictions will be
|
||||
faster on DMatrix.
|
||||
|
||||
If `newdata` is a `data.frame`, be aware that:\\itemize\{
|
||||
\\item Columns will be converted to numeric if they aren't already, which could potentially make
|
||||
the operation slower than in an equivalent `matrix` object.
|
||||
\\item The order of the columns must match with that of the data from which the model was fitted
|
||||
(i.e. columns will not be referenced by their names, just by their order in the data).
|
||||
\\item If the model was fitted to data with categorical columns, these columns must be of
|
||||
`factor` type here, and must use the same encoding (i.e. have the same levels).
|
||||
\\item If `newdata` contains any `factor` columns, they will be converted to base-0
|
||||
encoding (same as during DMatrix creation) - hence, one should not pass a `factor`
|
||||
under a column which during training had a different type.
|
||||
\}
|
||||
}\if{html}{\out{</div>}}}
|
||||
If \code{newdata} is a \code{data.frame}, be aware that:
|
||||
\itemize{
|
||||
\item Columns will be converted to numeric if they aren't already, which could potentially make
|
||||
the operation slower than in an equivalent \code{matrix} object.
|
||||
\item The order of the columns must match with that of the data from which the model was fitted
|
||||
(i.e. columns will not be referenced by their names, just by their order in the data).
|
||||
\item If the model was fitted to data with categorical columns, these columns must be of
|
||||
\code{factor} type here, and must use the same encoding (i.e. have the same levels).
|
||||
\item If \code{newdata} contains any \code{factor} columns, they will be converted to base-0
|
||||
encoding (same as during DMatrix creation) - hence, one should not pass a \code{factor}
|
||||
under a column which during training had a different type.
|
||||
}}
|
||||
|
||||
\item{missing}{Float value that represents missing values in data (e.g., 0 or some other extreme value).
|
||||
\item{missing}{Float value that represents missing values in data
|
||||
(e.g., 0 or some other extreme value).
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ This parameter is not used when `newdata` is an `xgb.DMatrix` - in such cases, should pass
|
||||
this as an argument to the DMatrix constructor instead.
|
||||
}\if{html}{\out{</div>}}}
|
||||
This parameter is not used when \code{newdata} is an \code{xgb.DMatrix} - in such cases,
|
||||
should pass this as an argument to the DMatrix constructor instead.}
|
||||
|
||||
\item{outputmargin}{Whether the prediction should be returned in the form of original untransformed
|
||||
sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
|
||||
logistic regression would return log-odds instead of probabilities.}
|
||||
\item{outputmargin}{Whether the prediction should be returned in the form of
|
||||
original untransformed sum of predictions from boosting iterations' results.
|
||||
E.g., setting \code{outputmargin = TRUE} for logistic regression would return log-odds
|
||||
instead of probabilities.}
|
||||
|
||||
\item{predleaf}{Whether to predict per-tree leaf indices.}
|
||||
|
||||
@ -73,60 +74,55 @@ training predicting will perform dropout.}
|
||||
a two-dimensional vector with the start and end numbers in the sequence (same format as R's \code{seq} - i.e.
|
||||
base-1 indexing, and inclusive of both ends).
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
|
||||
predict using only the first one.
|
||||
For example, passing \code{c(1,20)} will predict using the first twenty iterations, while passing \code{c(1,1)} will
|
||||
predict using only the first one.
|
||||
|
||||
If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
|
||||
of the iterations (rounds) otherwise.
|
||||
If passing \code{NULL}, will either stop at the best iteration if the model used early stopping, or use all
|
||||
of the iterations (rounds) otherwise.
|
||||
|
||||
If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
|
||||
}\if{html}{\out{</div>}}}
|
||||
If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.}
|
||||
|
||||
\item{strict_shape}{Whether to always return an array with the same dimensions for the given prediction mode
|
||||
regardless of the model type - meaning that, for example, both a multi-class and a binary classification
|
||||
model would generate output arrays with the same number of dimensions, with the 'class' dimension having
|
||||
size equal to '1' for the binary model.
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ If passing `FALSE` (the default), dimensions will be simplified according to the model type, so that a
|
||||
binary classification model for example would not have a redundant dimension for 'class'.
|
||||
If passing \code{FALSE} (the default), dimensions will be simplified according to the model type, so that a
|
||||
binary classification model for example would not have a redundant dimension for 'class'.
|
||||
|
||||
See documentation for the return type for the exact shape of the output arrays for each prediction mode.
|
||||
}\if{html}{\out{</div>}}}
|
||||
See documentation for the return type for the exact shape of the output arrays for each prediction mode.}
|
||||
|
||||
\item{avoid_transpose}{Whether to output the resulting predictions in the same memory layout in which they
|
||||
are generated by the core XGBoost library, without transposing them to match the expected output shape.
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ Internally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major
|
||||
order, hence the result needs to be transposed in order to have the expected shape when represented as
|
||||
an R array or matrix, which might be a slow operation.
|
||||
Internally, XGBoost uses row-major order for the predictions it generates, while R arrays use column-major
|
||||
order, hence the result needs to be transposed in order to have the expected shape when represented as
|
||||
an R array or matrix, which might be a slow operation.
|
||||
|
||||
If passing `TRUE`, then the result will have dimensions in reverse order - for example, rows
|
||||
will be the last dimensions instead of the first dimension.
|
||||
}\if{html}{\out{</div>}}}
|
||||
If passing \code{TRUE}, then the result will have dimensions in reverse order - for example, rows
|
||||
will be the last dimensions instead of the first dimension.}
|
||||
|
||||
\item{validate_features}{When \code{TRUE}, validate that the Booster's and newdata's feature_names
|
||||
match (only applicable when both \code{object} and \code{newdata} have feature names).
|
||||
\item{validate_features}{When \code{TRUE}, validate that the Booster's and newdata's
|
||||
feature_names match (only applicable when both \code{object} and \code{newdata} have feature names).
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ If the column names differ and `newdata` is not an `xgb.DMatrix`, will try to reorder
|
||||
the columns in `newdata` to match with the booster's.
|
||||
If the column names differ and \code{newdata} is not an \code{xgb.DMatrix}, will try to reorder
|
||||
the columns in \code{newdata} to match with the booster's.
|
||||
|
||||
If the booster has feature types and `newdata` is either an `xgb.DMatrix` or `data.frame`,
|
||||
will additionally verify that categorical columns are of the correct type in `newdata`,
|
||||
throwing an error if they do not match.
|
||||
If the booster has feature types and \code{newdata} is either an \code{xgb.DMatrix} or
|
||||
\code{data.frame}, will additionally verify that categorical columns are of the
|
||||
correct type in \code{newdata}, throwing an error if they do not match.
|
||||
|
||||
If passing `FALSE`, it is assumed that the feature names and types are the same,
|
||||
and come in the same order as in the training data.
|
||||
If passing \code{FALSE}, it is assumed that the feature names and types are the same,
|
||||
and come in the same order as in the training data.
|
||||
|
||||
Note that this check might add some sizable latency to the predictions, so it's
|
||||
recommended to disable it for performance-sensitive applications.
|
||||
}\if{html}{\out{</div>}}}
|
||||
Note that this check might add some sizable latency to the predictions, so it's
|
||||
recommended to disable it for performance-sensitive applications.}
|
||||
|
||||
\item{base_margin}{Base margin used for boosting from existing model.
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ Note that, if `newdata` is an `xgb.DMatrix` object, this argument will
|
||||
be ignored as it needs to be added to the DMatrix instead (e.g. by passing it as
|
||||
an argument in its constructor, or by calling \link{setinfo.xgb.DMatrix}).
|
||||
}\if{html}{\out{</div>}}}
|
||||
Note that, if \code{newdata} is an \code{xgb.DMatrix} object, this argument will
|
||||
be ignored as it needs to be added to the DMatrix instead (e.g. by passing it as
|
||||
an argument in its constructor, or by calling \code{\link[=setinfo.xgb.DMatrix]{setinfo.xgb.DMatrix()}}.}
|
||||
|
||||
\item{...}{Not used.}
|
||||
}
|
||||
@ -173,7 +169,7 @@ example, for \code{predinteraction}, they will be \verb{[nfeats+1, nfeats+1, ngr
|
||||
instead of \verb{[nrows, ngroups, nfeats+1, nfeats+1]}.
|
||||
}
|
||||
\description{
|
||||
Predict values on data based on xgboost model.
|
||||
Predict values on data based on XGBoost model.
|
||||
}
|
||||
\details{
|
||||
Note that \code{iterationrange} would currently do nothing for predictions from "gblinear",
|
||||
|
||||
@ -33,5 +33,4 @@ bst <- xgb.train(
|
||||
attr(bst, "myattr") <- "memo"
|
||||
|
||||
print(bst)
|
||||
|
||||
}
|
||||
|
||||
@ -13,8 +13,8 @@
|
||||
}
|
||||
\description{
|
||||
Returns the feature / variable / column names from a fitted
|
||||
booster object, which are set automatically during the call to \link{xgb.train}
|
||||
from the DMatrix names, or which can be set manually through \link{setinfo}.
|
||||
booster object, which are set automatically during the call to \code{\link[=xgb.train]{xgb.train()}}
|
||||
from the DMatrix names, or which can be set manually through \code{\link[=setinfo]{setinfo()}}.
|
||||
|
||||
If the object doesn't have feature names, will return \code{NULL}.
|
||||
|
||||
|
||||
@ -53,12 +53,12 @@ Return values of \code{NULL} will be interpreted as \code{FALSE}.}
|
||||
\item{f_after_training}{A function that will be executed after training is finished.
|
||||
|
||||
This function can optionally output something non-NULL, which will become part of the R
|
||||
attributes of the booster (assuming one passes \code{keep_extra_attributes=TRUE} to \link{xgb.train})
|
||||
under the name supplied for parameter \code{cb_name} imn the case of \link{xgb.train}; or a part
|
||||
of the named elements in the result of \link{xgb.cv}.}
|
||||
attributes of the booster (assuming one passes \code{keep_extra_attributes=TRUE} to \code{\link[=xgb.train]{xgb.train()}})
|
||||
under the name supplied for parameter \code{cb_name} imn the case of \code{\link[=xgb.train]{xgb.train()}}; or a part
|
||||
of the named elements in the result of \code{\link[=xgb.cv]{xgb.cv()}}.}
|
||||
}
|
||||
\value{
|
||||
An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}.
|
||||
}
|
||||
\description{
|
||||
Constructor for defining the structure of callback functions that can be executed
|
||||
@ -66,8 +66,8 @@ at different stages of model training (before / after training, before / after e
|
||||
iteration).
|
||||
}
|
||||
\details{
|
||||
Arguments that will be passed to the supplied functions are as follows:\itemize{
|
||||
|
||||
Arguments that will be passed to the supplied functions are as follows:
|
||||
\itemize{
|
||||
\item env The same environment that is passed under argument \code{env}.
|
||||
|
||||
It may be modified by the functions in order to e.g. keep tracking of what happens
|
||||
@ -75,11 +75,10 @@ across iterations or similar.
|
||||
|
||||
This environment is only used by the functions supplied to the callback, and will
|
||||
not be kept after the model fitting function terminates (see parameter \code{f_after_training}).
|
||||
\item model The booster object when using \code{\link[=xgb.train]{xgb.train()}}, or the folds when using \code{\link[=xgb.cv]{xgb.cv()}}.
|
||||
|
||||
\item model The booster object when using \link{xgb.train}, or the folds when using
|
||||
\link{xgb.cv}.
|
||||
|
||||
For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
|
||||
For \code{\link[=xgb.cv]{xgb.cv()}}, folds are a list with a structure as follows:
|
||||
\itemize{
|
||||
\item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object).
|
||||
\item \code{bst}: Rhe \code{xgb.Booster} object for the fold.
|
||||
\item \code{evals}: A list containing two DMatrices, with names \code{train} and \code{test}
|
||||
@ -88,79 +87,71 @@ For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
|
||||
from which the \code{test} entry in \code{evals} was obtained.
|
||||
}
|
||||
|
||||
This object should \bold{not} be in-place modified in ways that conflict with the
|
||||
This object should \strong{not} be in-place modified in ways that conflict with the
|
||||
training (e.g. resetting the parameters for a training update in a way that resets
|
||||
the number of rounds to zero in order to overwrite rounds).
|
||||
|
||||
Note that any R attributes that are assigned to the booster during the callback functions,
|
||||
will not be kept thereafter as the booster object variable is not re-assigned during
|
||||
training. It is however possible to set C-level attributes of the booster through
|
||||
\link{xgb.attr} or \link{xgb.attributes}, which should remain available for the rest
|
||||
\code{\link[=xgb.attr]{xgb.attr()}} or \code{\link[=xgb.attributes]{xgb.attributes()}}, which should remain available for the rest
|
||||
of the iterations and after the training is done.
|
||||
|
||||
For keeping variables across iterations, it's recommended to use \code{env} instead.
|
||||
\item data The data to which the model is being fit, as an \code{xgb.DMatrix} object.
|
||||
|
||||
Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
|
||||
Note that, for \code{\link[=xgb.cv]{xgb.cv()}}, this will be the full data, while data for the specific
|
||||
folds can be found in the \code{model} object.
|
||||
\item evals The evaluation data, as passed under argument \code{evals} to \code{\link[=xgb.train]{xgb.train()}}.
|
||||
|
||||
\item evals The evaluation data, as passed under argument \code{evals} to
|
||||
\link{xgb.train}.
|
||||
|
||||
For \link{xgb.cv}, this will always be \code{NULL}.
|
||||
|
||||
\item begin_iteration Index of the first boosting iteration that will be executed
|
||||
(base-1 indexing).
|
||||
For \code{\link[=xgb.cv]{xgb.cv()}}, this will always be \code{NULL}.
|
||||
\item begin_iteration Index of the first boosting iteration that will be executed (base-1 indexing).
|
||||
|
||||
This will typically be '1', but when using training continuation, depending on the
|
||||
parameters for updates, boosting rounds will be continued from where the previous
|
||||
model ended, in which case this will be larger than 1.
|
||||
|
||||
\item end_iteration Index of the last boostign iteration that will be executed
|
||||
(base-1 indexing, inclusive of this end).
|
||||
|
||||
It should match with argument \code{nrounds} passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
It should match with argument \code{nrounds} passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}.
|
||||
|
||||
Note that boosting might be interrupted before reaching this last iteration, for
|
||||
example by using the early stopping callback \link{xgb.cb.early.stop}.
|
||||
|
||||
\item iteration Index of the iteration number that is being executed (first iteration
|
||||
will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on).
|
||||
|
||||
\item iter_feval Evaluation metrics for \code{evals} that were supplied, either
|
||||
determined by the objective, or by parameter \code{feval}.
|
||||
|
||||
For \link{xgb.train}, this will be a named vector with one entry per element in
|
||||
For \code{\link[=xgb.train]{xgb.train()}}, this will be a named vector with one entry per element in
|
||||
\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for
|
||||
example, if \code{evals} contains an entry named "tr" and the metric is "rmse",
|
||||
this will be a one-element vector with name "tr-rmse".
|
||||
|
||||
For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
|
||||
For \code{\link[=xgb.cv]{xgb.cv()}}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
|
||||
where the row names will follow the same naming logic as the one-dimensional vector
|
||||
that is passed in \link{xgb.train}.
|
||||
that is passed in \code{\link[=xgb.train]{xgb.train()}}.
|
||||
|
||||
Note that, internally, the built-in callbacks such as \link{xgb.cb.print.evaluation} summarize
|
||||
this table by calculating the row-wise means and standard deviations.
|
||||
|
||||
\item final_feval The evaluation results after the last boosting round is executed
|
||||
(same format as \code{iter_feval}, and will be the exact same input as passed under
|
||||
\code{iter_feval} to the last round that is executed during model fitting).
|
||||
|
||||
\item prev_cb_res Result from a previous run of a callback sharing the same name
|
||||
(as given by parameter \code{cb_name}) when conducting training continuation, if there
|
||||
was any in the booster R attributes.
|
||||
|
||||
Some times, one might want to append the new results to the previous one, and this will
|
||||
Sometimes, one might want to append the new results to the previous one, and this will
|
||||
be done automatically by the built-in callbacks such as \link{xgb.cb.evaluation.log},
|
||||
which will append the new rows to the previous table.
|
||||
|
||||
If no such previous callback result is available (which it never will when fitting
|
||||
a model from start instead of updating an existing model), this will be \code{NULL}.
|
||||
|
||||
For \link{xgb.cv}, which doesn't support training continuation, this will always be \code{NULL}.
|
||||
For \code{\link[=xgb.cv]{xgb.cv()}}, which doesn't support training continuation, this will always be \code{NULL}.
|
||||
}
|
||||
|
||||
The following names (\code{cb_name} values) are reserved for internal callbacks:\itemize{
|
||||
The following names (\code{cb_name} values) are reserved for internal callbacks:
|
||||
\itemize{
|
||||
\item print_evaluation
|
||||
\item evaluation_log
|
||||
\item reset_parameters
|
||||
@ -170,7 +161,8 @@ The following names (\code{cb_name} values) are reserved for internal callbacks:
|
||||
\item gblinear_history
|
||||
}
|
||||
|
||||
The following names are reserved for other non-callback attributes:\itemize{
|
||||
The following names are reserved for other non-callback attributes:
|
||||
\itemize{
|
||||
\item names
|
||||
\item class
|
||||
\item call
|
||||
@ -221,8 +213,10 @@ ssq_callback <- xgb.Callback(
|
||||
)
|
||||
|
||||
data(mtcars)
|
||||
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
model <- xgb.train(
|
||||
data = dm,
|
||||
@ -236,7 +230,8 @@ model <- xgb.train(
|
||||
attributes(model)$ssq
|
||||
}
|
||||
\seealso{
|
||||
Built-in callbacks:\itemize{
|
||||
Built-in callbacks:
|
||||
\itemize{
|
||||
\item \link{xgb.cb.print.evaluation}
|
||||
\item \link{xgb.cb.evaluation.log}
|
||||
\item \link{xgb.cb.reset.parameters}
|
||||
|
||||
@ -96,8 +96,7 @@ so it doesn't make sense to assign weights to individual data points.}
|
||||
|
||||
\item{base_margin}{Base margin used for boosting from existing model.
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ In the case of multi-output models, one can also pass multi-dimensional base_margin.
|
||||
}\if{html}{\out{</div>}}}
|
||||
In the case of multi-output models, one can also pass multi-dimensional base_margin.}
|
||||
|
||||
\item{missing}{A float value to represents missing values in data (not used when creating DMatrix
|
||||
from text files).
|
||||
@ -109,9 +108,8 @@ values in data.}
|
||||
\item{feature_names}{Set names for features. Overrides column names in data
|
||||
frame and matrix.
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ Note: columns are not referenced by name when calling `predict`, so the column order there
|
||||
must be the same as in the DMatrix construction, regardless of the column names.
|
||||
}\if{html}{\out{</div>}}}
|
||||
Note: columns are not referenced by name when calling \code{predict}, so the column order there
|
||||
must be the same as in the DMatrix construction, regardless of the column names.}
|
||||
|
||||
\item{feature_types}{Set types for features.
|
||||
|
||||
|
||||
@ -45,15 +45,13 @@ so it doesn't make sense to assign weights to individual data points.}
|
||||
|
||||
\item{base_margin}{Base margin used for boosting from existing model.
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ In the case of multi-output models, one can also pass multi-dimensional base_margin.
|
||||
}\if{html}{\out{</div>}}}
|
||||
In the case of multi-output models, one can also pass multi-dimensional base_margin.}
|
||||
|
||||
\item{feature_names}{Set names for features. Overrides column names in data
|
||||
frame and matrix.
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ Note: columns are not referenced by name when calling `predict`, so the column order there
|
||||
must be the same as in the DMatrix construction, regardless of the column names.
|
||||
}\if{html}{\out{</div>}}}
|
||||
Note: columns are not referenced by name when calling \code{predict}, so the column order there
|
||||
must be the same as in the DMatrix construction, regardless of the column names.}
|
||||
|
||||
\item{feature_types}{Set types for features.
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@ xgb.attributes(object)
|
||||
xgb.attributes(object) <- value
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class \code{xgb.Booster}. \bold{Will be modified in-place} when assigning to it.}
|
||||
\item{object}{Object of class \code{xgb.Booster}. \strong{Will be modified in-place} when assigning to it.}
|
||||
|
||||
\item{name}{A non-empty character string specifying which attribute is to be accessed.}
|
||||
|
||||
@ -36,28 +36,28 @@ or \code{NULL} if a model has no stored attributes.
|
||||
}
|
||||
}
|
||||
\description{
|
||||
These methods allow to manipulate the key-value attribute strings of an xgboost model.
|
||||
These methods allow to manipulate the key-value attribute strings of an XGBoost model.
|
||||
}
|
||||
\details{
|
||||
The primary purpose of xgboost model attributes is to store some meta data about the model.
|
||||
The primary purpose of XGBoost model attributes is to store some meta data about the model.
|
||||
Note that they are a separate concept from the object attributes in R.
|
||||
Specifically, they refer to key-value strings that can be attached to an xgboost model,
|
||||
Specifically, they refer to key-value strings that can be attached to an XGBoost model,
|
||||
stored together with the model's binary representation, and accessed later
|
||||
(from R or any other interface).
|
||||
In contrast, any R attribute assigned to an R object of \code{xgb.Booster} class
|
||||
would not be saved by \code{\link[=xgb.save]{xgb.save()}} because an xgboost model is an external memory object
|
||||
would not be saved by \code{\link[=xgb.save]{xgb.save()}} because an XGBoost model is an external memory object
|
||||
and its serialization is handled externally.
|
||||
Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
|
||||
Also, setting an attribute that has the same name as one of XGBoost's parameters wouldn't
|
||||
change the value of that parameter for a model.
|
||||
Use \code{\link[=xgb.parameters<-]{xgb.parameters<-()}} to set or change model parameters.
|
||||
|
||||
The \verb{xgb.attributes<-} setter either updates the existing or adds one or several attributes,
|
||||
The \code{\link[=xgb.attributes<-]{xgb.attributes<-()}} setter either updates the existing or adds one or several attributes,
|
||||
but it doesn't delete the other existing attributes.
|
||||
|
||||
Important: since this modifies the booster's C object, semantics for assignment here
|
||||
will differ from R's, as any object reference to the same booster will be modified
|
||||
too, while assignment of R attributes through \verb{attributes(model)$<attr> <- <value>}
|
||||
will follow the usual copy-on-write R semantics (see \link{xgb.copy.Booster} for an
|
||||
will follow the usual copy-on-write R semantics (see \code{\link[=xgb.copy.Booster]{xgb.copy.Booster()}} for an
|
||||
example of these behaviors).
|
||||
}
|
||||
\examples{
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
% Please edit documentation in R/callbacks.R
|
||||
\name{xgb.cb.cv.predict}
|
||||
\alias{xgb.cb.cv.predict}
|
||||
\title{Callback for returning cross-validation based predictions.}
|
||||
\title{Callback for returning cross-validation based predictions}
|
||||
\usage{
|
||||
xgb.cb.cv.predict(save_models = FALSE, outputmargin = FALSE)
|
||||
}
|
||||
@ -13,8 +13,8 @@ xgb.cb.cv.predict(save_models = FALSE, outputmargin = FALSE)
|
||||
parameter to \link{predict.xgb.Booster}).}
|
||||
}
|
||||
\value{
|
||||
An \code{xgb.Callback} object, which can be passed to \link{xgb.cv},
|
||||
but \bold{not} to \link{xgb.train}.
|
||||
An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.cv]{xgb.cv()}},
|
||||
but \strong{not} to \code{\link[=xgb.train]{xgb.train()}}.
|
||||
}
|
||||
\description{
|
||||
This callback function saves predictions for all of the test folds,
|
||||
@ -24,7 +24,7 @@ and also allows to save the folds' models.
|
||||
Predictions are saved inside of the \code{pred} element, which is either a vector or a matrix,
|
||||
depending on the number of prediction outputs per data row. The order of predictions corresponds
|
||||
to the order of rows in the original dataset. Note that when a custom \code{folds} list is
|
||||
provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a
|
||||
provided in \code{\link[=xgb.cv]{xgb.cv()}}, the predictions would only be returned properly when this list is a
|
||||
non-overlapping list of k sets of indices, as in a standard k-fold CV. The predictions would not be
|
||||
meaningful when user-provided folds have overlapping indices as in, e.g., random sampling splits.
|
||||
When some of the indices in the training dataset are not included into user-provided \code{folds},
|
||||
|
||||
@ -23,7 +23,7 @@ stopping. If not set, the last column would be used.
|
||||
Let's say the test data in \code{evals} was labelled as \code{dtest},
|
||||
and one wants to use the AUC in test data for early stopping regardless of where
|
||||
it is in the \code{evals}, then one of the following would need to be set:
|
||||
\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
|
||||
\code{metric_name = 'dtest-auc'} or \code{metric_name = 'dtest_auc'}.
|
||||
All dash '-' characters in metric names are considered equivalent to '_'.}
|
||||
|
||||
\item{verbose}{Whether to print the early stopping information.}
|
||||
@ -33,7 +33,7 @@ in the resulting object. If passing \code{FALSE}, will only keep the boosting ro
|
||||
up to the detected best iteration, discarding the ones that come after.}
|
||||
}
|
||||
\value{
|
||||
An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}.
|
||||
}
|
||||
\description{
|
||||
This callback function determines the condition for early stopping.
|
||||
@ -49,7 +49,7 @@ The same values are also stored as R attributes as a result of the callback, plu
|
||||
attribute \code{stopped_by_max_rounds} which indicates whether an early stopping by the \code{stopping_rounds}
|
||||
condition occurred. Note that the \code{best_iteration} that is stored under R attributes will follow
|
||||
base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
|
||||
through \link{xgb.attr} or \link{xgb.attributes}.
|
||||
through \code{\link[=xgb.attr]{xgb.attr()}} or \code{\link[=xgb.attributes]{xgb.attributes()}}.
|
||||
|
||||
At least one dataset is required in \code{evals} for early stopping to work.
|
||||
}
|
||||
|
||||
@ -7,14 +7,14 @@
|
||||
xgb.cb.evaluation.log()
|
||||
}
|
||||
\value{
|
||||
An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}.
|
||||
}
|
||||
\description{
|
||||
Callback for logging the evaluation history
|
||||
}
|
||||
\details{
|
||||
This callback creates a table with per-iteration evaluation metrics (see parameters
|
||||
\code{evals} and \code{feval} in \link{xgb.train}).
|
||||
\code{evals} and \code{feval} in \code{\link[=xgb.train]{xgb.train()}}).
|
||||
|
||||
Note: in the column names of the final data.table, the dash '-' character is replaced with
|
||||
the underscore '_' in order to make the column names more like regular R identifiers.
|
||||
|
||||
@ -7,13 +7,13 @@
|
||||
xgb.cb.gblinear.history(sparse = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{sparse}{when set to \code{FALSE}/\code{TRUE}, a dense/sparse matrix is used to store the result.
|
||||
\item{sparse}{When set to \code{FALSE}/\code{TRUE}, a dense/sparse matrix is used to store the result.
|
||||
Sparse format is useful when one expects only a subset of coefficients to be non-zero,
|
||||
when using the "thrifty" feature selector with fairly small number of top features
|
||||
selected per iteration.}
|
||||
}
|
||||
\value{
|
||||
An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}.
|
||||
}
|
||||
\description{
|
||||
Callback for collecting coefficients history of a gblinear booster
|
||||
@ -37,11 +37,10 @@ will have column names matching with the feature names, otherwise (when there's
|
||||
one coefficient per feature) the names will be composed as 'column name' + ':' + 'class index'
|
||||
(so e.g. column 'c1' for class '0' will be named 'c1:0').
|
||||
|
||||
With \code{xgb.train}, the output is either a dense or a sparse matrix.
|
||||
With with \code{xgb.cv}, it is a list (one element per each fold) of such
|
||||
matrices.
|
||||
With \code{\link[=xgb.train]{xgb.train()}}, the output is either a dense or a sparse matrix.
|
||||
With with \code{\link[=xgb.cv]{xgb.cv()}}, it is a list (one element per each fold) of such matrices.
|
||||
|
||||
Function \link{xgb.gblinear.history} function provides an easy way to retrieve the
|
||||
Function \link{xgb.gblinear.history} provides an easy way to retrieve the
|
||||
outputs from this callback.
|
||||
}
|
||||
\examples{
|
||||
@ -53,57 +52,109 @@ data.table::setDTthreads(nthread)
|
||||
|
||||
# In the iris dataset, it is hard to linearly separate Versicolor class from the rest
|
||||
# without considering the 2nd order interactions:
|
||||
x <- model.matrix(Species ~ .^2, iris)[,-1]
|
||||
x <- model.matrix(Species ~ .^2, iris)[, -1]
|
||||
colnames(x)
|
||||
dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = nthread)
|
||||
param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc",
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||
dtrain <- xgb.DMatrix(
|
||||
scale(x),
|
||||
label = 1 * (iris$Species == "versicolor"),
|
||||
nthread = nthread
|
||||
)
|
||||
param <- list(
|
||||
booster = "gblinear",
|
||||
objective = "reg:logistic",
|
||||
eval_metric = "auc",
|
||||
lambda = 0.0003,
|
||||
alpha = 0.0003,
|
||||
nthread = nthread
|
||||
)
|
||||
|
||||
# For 'shotgun', which is a default linear updater, using high eta values may result in
|
||||
# unstable behaviour in some datasets. With this simple dataset, however, the high learning
|
||||
# rate does not break the convergence, but allows us to illustrate the typical pattern of
|
||||
# "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations.
|
||||
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1.,
|
||||
callbacks = list(xgb.cb.gblinear.history()))
|
||||
bst <- xgb.train(
|
||||
param,
|
||||
dtrain,
|
||||
list(tr = dtrain),
|
||||
nrounds = 200,
|
||||
eta = 1.,
|
||||
callbacks = list(xgb.cb.gblinear.history())
|
||||
)
|
||||
|
||||
# Extract the coefficients' path and plot them vs boosting iteration number:
|
||||
coef_path <- xgb.gblinear.history(bst)
|
||||
matplot(coef_path, type = 'l')
|
||||
matplot(coef_path, type = "l")
|
||||
|
||||
# With the deterministic coordinate descent updater, it is safer to use higher learning rates.
|
||||
# Will try the classical componentwise boosting which selects a single best feature per round:
|
||||
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8,
|
||||
updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1,
|
||||
callbacks = list(xgb.cb.gblinear.history()))
|
||||
matplot(xgb.gblinear.history(bst), type = 'l')
|
||||
bst <- xgb.train(
|
||||
param,
|
||||
dtrain,
|
||||
list(tr = dtrain),
|
||||
nrounds = 200,
|
||||
eta = 0.8,
|
||||
updater = "coord_descent",
|
||||
feature_selector = "thrifty",
|
||||
top_k = 1,
|
||||
callbacks = list(xgb.cb.gblinear.history())
|
||||
)
|
||||
matplot(xgb.gblinear.history(bst), type = "l")
|
||||
# Componentwise boosting is known to have similar effect to Lasso regularization.
|
||||
# Try experimenting with various values of top_k, eta, nrounds,
|
||||
# as well as different feature_selectors.
|
||||
|
||||
# For xgb.cv:
|
||||
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
|
||||
callbacks = list(xgb.cb.gblinear.history()))
|
||||
bst <- xgb.cv(
|
||||
param,
|
||||
dtrain,
|
||||
nfold = 5,
|
||||
nrounds = 100,
|
||||
eta = 0.8,
|
||||
callbacks = list(xgb.cb.gblinear.history())
|
||||
)
|
||||
# coefficients in the CV fold #3
|
||||
matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
|
||||
matplot(xgb.gblinear.history(bst)[[3]], type = "l")
|
||||
|
||||
|
||||
#### Multiclass classification:
|
||||
#
|
||||
dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = nthread)
|
||||
param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
|
||||
lambda = 0.0003, alpha = 0.0003, nthread = nthread)
|
||||
|
||||
param <- list(
|
||||
booster = "gblinear",
|
||||
objective = "multi:softprob",
|
||||
num_class = 3,
|
||||
lambda = 0.0003,
|
||||
alpha = 0.0003,
|
||||
nthread = nthread
|
||||
)
|
||||
|
||||
# For the default linear updater 'shotgun' it sometimes is helpful
|
||||
# to use smaller eta to reduce instability
|
||||
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
|
||||
callbacks = list(xgb.cb.gblinear.history()))
|
||||
bst <- xgb.train(
|
||||
param,
|
||||
dtrain,
|
||||
list(tr = dtrain),
|
||||
nrounds = 50,
|
||||
eta = 0.5,
|
||||
callbacks = list(xgb.cb.gblinear.history())
|
||||
)
|
||||
|
||||
# Will plot the coefficient paths separately for each class:
|
||||
matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l')
|
||||
matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l')
|
||||
matplot(xgb.gblinear.history(bst, class_index = 2), type = 'l')
|
||||
matplot(xgb.gblinear.history(bst, class_index = 0), type = "l")
|
||||
matplot(xgb.gblinear.history(bst, class_index = 1), type = "l")
|
||||
matplot(xgb.gblinear.history(bst, class_index = 2), type = "l")
|
||||
|
||||
# CV:
|
||||
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5,
|
||||
callbacks = list(xgb.cb.gblinear.history(FALSE)))
|
||||
bst <- xgb.cv(
|
||||
param,
|
||||
dtrain,
|
||||
nfold = 5,
|
||||
nrounds = 70,
|
||||
eta = 0.5,
|
||||
callbacks = list(xgb.cb.gblinear.history(FALSE))
|
||||
)
|
||||
# 1st fold of 1st class
|
||||
matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l')
|
||||
matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = "l")
|
||||
|
||||
}
|
||||
\seealso{
|
||||
|
||||
@ -7,12 +7,12 @@
|
||||
xgb.cb.print.evaluation(period = 1, showsd = TRUE)
|
||||
}
|
||||
\arguments{
|
||||
\item{period}{results would be printed every number of periods}
|
||||
\item{period}{Results would be printed every number of periods.}
|
||||
|
||||
\item{showsd}{whether standard deviations should be printed (when available)}
|
||||
\item{showsd}{Whether standard deviations should be printed (when available).}
|
||||
}
|
||||
\value{
|
||||
An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}.
|
||||
}
|
||||
\description{
|
||||
The callback function prints the result of evaluation at every \code{period} iterations.
|
||||
|
||||
@ -2,12 +2,12 @@
|
||||
% Please edit documentation in R/callbacks.R
|
||||
\name{xgb.cb.reset.parameters}
|
||||
\alias{xgb.cb.reset.parameters}
|
||||
\title{Callback for resetting the booster's parameters at each iteration.}
|
||||
\title{Callback for resetting booster parameters at each iteration}
|
||||
\usage{
|
||||
xgb.cb.reset.parameters(new_params)
|
||||
}
|
||||
\arguments{
|
||||
\item{new_params}{a list where each element corresponds to a parameter that needs to be reset.
|
||||
\item{new_params}{List of parameters needed to be reset.
|
||||
Each element's value must be either a vector of values of length \code{nrounds}
|
||||
to be set at each iteration,
|
||||
or a function of two parameters \code{learning_rates(iteration, nrounds)}
|
||||
@ -15,10 +15,10 @@ which returns a new parameter value by using the current iteration number
|
||||
and the total number of boosting rounds.}
|
||||
}
|
||||
\value{
|
||||
An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
|
||||
An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}.
|
||||
}
|
||||
\description{
|
||||
Callback for resetting the booster's parameters at each iteration.
|
||||
Callback for resetting booster parameters at each iteration
|
||||
}
|
||||
\details{
|
||||
Note that when training is resumed from some previous model, and a function is used to
|
||||
|
||||
@ -2,23 +2,22 @@
|
||||
% Please edit documentation in R/callbacks.R
|
||||
\name{xgb.cb.save.model}
|
||||
\alias{xgb.cb.save.model}
|
||||
\title{Callback for saving a model file.}
|
||||
\title{Callback for saving a model file}
|
||||
\usage{
|
||||
xgb.cb.save.model(save_period = 0, save_name = "xgboost.ubj")
|
||||
}
|
||||
\arguments{
|
||||
\item{save_period}{Save the model to disk after every
|
||||
\code{save_period} iterations; 0 means save the model at the end.}
|
||||
\item{save_period}{Save the model to disk after every \code{save_period} iterations;
|
||||
0 means save the model at the end.}
|
||||
|
||||
\item{save_name}{The name or path for the saved model file.
|
||||
It can contain a \code{\link[base]{sprintf}} formatting specifier
|
||||
to include the integer iteration number in the file name.
|
||||
E.g., with \code{save_name} = 'xgboost_\%04d.model',
|
||||
It can contain a \code{\link[=sprintf]{sprintf()}} formatting specifier to include the integer
|
||||
iteration number in the file name. E.g., with \code{save_name = 'xgboost_\%04d.model'},
|
||||
the file saved at iteration 50 would be named "xgboost_0050.model".}
|
||||
}
|
||||
\value{
|
||||
An \code{xgb.Callback} object, which can be passed to \link{xgb.train},
|
||||
but \bold{not} to \link{xgb.cv}.
|
||||
An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}},
|
||||
but \strong{not} to \code{\link[=xgb.cv]{xgb.cv()}}.
|
||||
}
|
||||
\description{
|
||||
This callback function allows to save an xgb-model file, either periodically
|
||||
|
||||
@ -10,12 +10,12 @@ xgb.config(object)
|
||||
xgb.config(object) <- value
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class \code{xgb.Booster}. \bold{Will be modified in-place} when assigning to it.}
|
||||
\item{object}{Object of class \code{xgb.Booster}.\strong{Will be modified in-place} when assigning to it.}
|
||||
|
||||
\item{value}{An R list.}
|
||||
\item{value}{A list.}
|
||||
}
|
||||
\value{
|
||||
\code{xgb.config} will return the parameters as an R list.
|
||||
Parameters as a list.
|
||||
}
|
||||
\description{
|
||||
Accessors for model parameters as JSON string
|
||||
@ -25,7 +25,7 @@ Note that assignment is performed in-place on the booster C object, which unlike
|
||||
of R attributes, doesn't follow typical copy-on-write semantics for assignment - i.e. all references
|
||||
to the same booster will also get updated.
|
||||
|
||||
See \link{xgb.copy.Booster} for an example of this behavior.
|
||||
See \code{\link[=xgb.copy.Booster]{xgb.copy.Booster()}} for an example of this behavior.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package = "xgboost")
|
||||
|
||||
@ -16,14 +16,18 @@ functions called on that copy will not affect the \code{model} variable.
|
||||
\description{
|
||||
Creates a deep copy of an 'xgb.Booster' object, such that the
|
||||
C object pointer contained will be a different object, and hence functions
|
||||
like \link{xgb.attr} will not affect the object from which it was copied.
|
||||
like \code{\link[=xgb.attr]{xgb.attr()}} will not affect the object from which it was copied.
|
||||
}
|
||||
\examples{
|
||||
library(xgboost)
|
||||
|
||||
data(mtcars)
|
||||
|
||||
y <- mtcars$mpg
|
||||
x <- mtcars[, -1]
|
||||
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
|
||||
model <- xgb.train(
|
||||
data = dm,
|
||||
params = list(nthread = 1),
|
||||
|
||||
@ -7,17 +7,18 @@
|
||||
xgb.create.features(model, data, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{decision tree boosting model learned on the original data}
|
||||
\item{model}{Decision tree boosting model learned on the original data.}
|
||||
|
||||
\item{data}{original data (usually provided as a \code{dgCMatrix} matrix)}
|
||||
\item{data}{Original data (usually provided as a \code{dgCMatrix} matrix).}
|
||||
|
||||
\item{...}{currently not used}
|
||||
\item{...}{Currently not used.}
|
||||
}
|
||||
\value{
|
||||
\code{dgCMatrix} matrix including both the original data and the new features.
|
||||
A \code{dgCMatrix} matrix including both the original data and the new features.
|
||||
}
|
||||
\description{
|
||||
May improve the learning by adding new features to the training data based on the decision trees from a previously learned model.
|
||||
May improve the learning by adding new features to the training data based on the
|
||||
decision trees from a previously learned model.
|
||||
}
|
||||
\details{
|
||||
This is the function inspired from the paragraph 3.1 of the paper:
|
||||
@ -44,11 +45,11 @@ For example, consider the boosted tree model in Figure 1 with 2 subtrees,
|
||||
where the first subtree has 3 leafs and the second 2 leafs. If an
|
||||
instance ends up in leaf 2 in the first subtree and leaf 1 in
|
||||
second subtree, the overall input to the linear classifier will
|
||||
be the binary vector \code{[0, 1, 0, 1, 0]}, where the first 3 entries
|
||||
be the binary vector \verb{[0, 1, 0, 1, 0]}, where the first 3 entries
|
||||
correspond to the leaves of the first subtree and last 2 to
|
||||
those of the second subtree.
|
||||
|
||||
\link{...}
|
||||
...
|
||||
|
||||
We can understand boosted decision tree
|
||||
based transformation as a supervised feature encoding that
|
||||
@ -57,15 +58,16 @@ vector. A traversal from root node to a leaf node represents
|
||||
a rule on certain features."
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
data(agaricus.train, package = "xgboost")
|
||||
data(agaricus.test, package = "xgboost")
|
||||
|
||||
dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2))
|
||||
|
||||
param <- list(max_depth=2, eta=1, objective='binary:logistic')
|
||||
param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
|
||||
nrounds = 4
|
||||
|
||||
bst = xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
|
||||
bst <- xgb.train(params = param, data = dtrain, nrounds = nrounds, nthread = 2)
|
||||
|
||||
# Model accuracy without new features
|
||||
accuracy.before <- sum((predict(bst, agaricus.test$data) >= 0.5) == agaricus.test$label) /
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
% Please edit documentation in R/xgb.dump.R
|
||||
\name{xgb.dump}
|
||||
\alias{xgb.dump}
|
||||
\title{Dump an xgboost model in text format.}
|
||||
\title{Dump an XGBoost model in text format.}
|
||||
\usage{
|
||||
xgb.dump(
|
||||
model,
|
||||
@ -14,43 +14,51 @@ xgb.dump(
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{the model object.}
|
||||
\item{model}{The model object.}
|
||||
|
||||
\item{fname}{the name of the text file where to save the model text dump.
|
||||
If not provided or set to \code{NULL}, the model is returned as a \code{character} vector.}
|
||||
\item{fname}{The name of the text file where to save the model text dump.
|
||||
If not provided or set to \code{NULL}, the model is returned as a character vector.}
|
||||
|
||||
\item{fmap}{feature map file representing feature types.
|
||||
See demo/ for walkthrough example in R, and
|
||||
\url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt}
|
||||
for example Format.}
|
||||
\item{fmap}{Feature map file representing feature types. See demo/ for a walkthrough
|
||||
example in R, and \url{https://github.com/dmlc/xgboost/blob/master/demo/data/featmap.txt}
|
||||
to see an example of the value.}
|
||||
|
||||
\item{with_stats}{whether to dump some additional statistics about the splits.
|
||||
\item{with_stats}{Whether to dump some additional statistics about the splits.
|
||||
When this option is on, the model dump contains two additional values:
|
||||
gain is the approximate loss function gain we get in each split;
|
||||
cover is the sum of second order gradient in each node.}
|
||||
|
||||
\item{dump_format}{either 'text', 'json', or 'dot' (graphviz) format could be specified.
|
||||
\item{dump_format}{Either 'text', 'json', or 'dot' (graphviz) format could be specified.
|
||||
|
||||
Format 'dot' for a single tree can be passed directly to packages that consume this format
|
||||
for graph visualization, such as function \code{\link[DiagrammeR:grViz]{DiagrammeR::grViz()}}}
|
||||
for graph visualization, such as function \code{DiagrammeR::grViz()}}
|
||||
|
||||
\item{...}{currently not used}
|
||||
\item{...}{Currently not used}
|
||||
}
|
||||
\value{
|
||||
If fname is not provided or set to \code{NULL} the function will return the model
|
||||
as a \code{character} vector. Otherwise it will return \code{TRUE}.
|
||||
as a character vector. Otherwise it will return \code{TRUE}.
|
||||
}
|
||||
\description{
|
||||
Dump an xgboost model in text format.
|
||||
Dump an XGBoost model in text format.
|
||||
}
|
||||
\examples{
|
||||
\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
data(agaricus.train, package = "xgboost")
|
||||
data(agaricus.test, package = "xgboost")
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
|
||||
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||
|
||||
bst <- xgb.train(
|
||||
data = xgb.DMatrix(train$data, label = train$label),
|
||||
max_depth = 2,
|
||||
eta = 1,
|
||||
nthread = 2,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
|
||||
# save the model in file 'xgb.model.dump'
|
||||
dump_path = file.path(tempdir(), 'model.dump')
|
||||
xgb.dump(bst, dump_path, with_stats = TRUE)
|
||||
@ -59,7 +67,7 @@ xgb.dump(bst, dump_path, with_stats = TRUE)
|
||||
print(xgb.dump(bst, with_stats = TRUE))
|
||||
|
||||
# print in JSON format:
|
||||
cat(xgb.dump(bst, with_stats = TRUE, dump_format='json'))
|
||||
cat(xgb.dump(bst, with_stats = TRUE, dump_format = "json"))
|
||||
|
||||
# plot first tree leveraging the 'dot' format
|
||||
if (requireNamespace('DiagrammeR', quietly = TRUE)) {
|
||||
|
||||
@ -2,24 +2,24 @@
|
||||
% Please edit documentation in R/callbacks.R
|
||||
\name{xgb.gblinear.history}
|
||||
\alias{xgb.gblinear.history}
|
||||
\title{Extract gblinear coefficients history.}
|
||||
\title{Extract gblinear coefficients history}
|
||||
\usage{
|
||||
xgb.gblinear.history(model, class_index = NULL)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained
|
||||
using the \link{xgb.cb.gblinear.history} callback, but \bold{not} a booster
|
||||
loaded from \link{xgb.load} or \link{xgb.load.raw}.}
|
||||
\item{model}{Either an \code{xgb.Booster} or a result of \code{\link[=xgb.cv]{xgb.cv()}}, trained
|
||||
using the \link{xgb.cb.gblinear.history} callback, but \strong{not} a booster
|
||||
loaded from \code{\link[=xgb.load]{xgb.load()}} or \code{\link[=xgb.load.raw]{xgb.load.raw()}}.}
|
||||
|
||||
\item{class_index}{zero-based class index to extract the coefficients for only that
|
||||
specific class in a multinomial multiclass model. When it is NULL, all the
|
||||
specific class in a multinomial multiclass model. When it is \code{NULL}, all the
|
||||
coefficients are returned. Has no effect in non-multiclass models.}
|
||||
}
|
||||
\value{
|
||||
For an \link{xgb.train} result, a matrix (either dense or sparse) with the columns
|
||||
For an \code{\link[=xgb.train]{xgb.train()}} result, a matrix (either dense or sparse) with the columns
|
||||
corresponding to iteration's coefficients and the rows corresponding to boosting iterations.
|
||||
|
||||
For an \link{xgb.cv} result, a list of such matrices is returned with the elements
|
||||
For an \code{\link[=xgb.cv]{xgb.cv()}} result, a list of such matrices is returned with the elements
|
||||
corresponding to CV folds.
|
||||
|
||||
When there is more than one coefficient per feature (e.g. multi-class classification)
|
||||
@ -31,15 +31,15 @@ coefficients N+1 through 2N for the second class, and so on).
|
||||
\description{
|
||||
A helper function to extract the matrix of linear coefficients' history
|
||||
from a gblinear model created while using the \link{xgb.cb.gblinear.history}
|
||||
callback (which must be added manually as by default it's not used).
|
||||
callback (which must be added manually as by default it is not used).
|
||||
}
|
||||
\details{
|
||||
Note that this is an R-specific function that relies on R attributes that
|
||||
are not saved when using xgboost's own serialization functions like \link{xgb.load}
|
||||
or \link{xgb.load.raw}.
|
||||
are not saved when using XGBoost's own serialization functions like \code{\link[=xgb.load]{xgb.load()}}
|
||||
or \code{\link[=xgb.load.raw]{xgb.load.raw()}}.
|
||||
|
||||
In order for a serialized model to be accepted by this function, one must use R
|
||||
serializers such as \link{saveRDS}.
|
||||
serializers such as \code{\link[=saveRDS]{saveRDS()}}.
|
||||
}
|
||||
\seealso{
|
||||
\link{xgb.cb.gblinear.history}, \link{coef.xgb.Booster}.
|
||||
|
||||
@ -13,13 +13,13 @@ xgb.get.num.boosted.rounds(model)
|
||||
\item{model, x}{A fitted \code{xgb.Booster} model.}
|
||||
}
|
||||
\value{
|
||||
The number of rounds saved in the model, as an integer.
|
||||
The number of rounds saved in the model as an integer.
|
||||
}
|
||||
\description{
|
||||
Get number of boosting in a fitted booster
|
||||
}
|
||||
\details{
|
||||
Note that setting booster parameters related to training
|
||||
continuation / updates through \link{xgb.parameters<-} will reset the
|
||||
continuation / updates through \code{\link[=xgb.parameters<-]{xgb.parameters<-()}} will reset the
|
||||
number of rounds to zero.
|
||||
}
|
||||
|
||||
@ -12,30 +12,33 @@ xgb.is.same.Booster(obj1, obj2)
|
||||
\item{obj2}{Booster model to compare with \code{obj1}.}
|
||||
}
|
||||
\value{
|
||||
Either \code{TRUE} or \code{FALSE} according to whether the two boosters share
|
||||
the underlying C object.
|
||||
Either \code{TRUE} or \code{FALSE} according to whether the two boosters share the
|
||||
underlying C object.
|
||||
}
|
||||
\description{
|
||||
Checks whether two booster objects refer to the same underlying C object.
|
||||
}
|
||||
\details{
|
||||
As booster objects (as returned by e.g. \link{xgb.train}) contain an R 'externalptr'
|
||||
As booster objects (as returned by e.g. \code{\link[=xgb.train]{xgb.train()}}) contain an R 'externalptr'
|
||||
object, they don't follow typical copy-on-write semantics of other R objects - that is, if
|
||||
one assigns a booster to a different variable and modifies that new variable through in-place
|
||||
methods like \link{xgb.attr<-}, the modification will be applied to both the old and the new
|
||||
methods like \code{\link[=xgb.attr<-]{xgb.attr<-()}}, the modification will be applied to both the old and the new
|
||||
variable, unlike typical R assignments which would only modify the latter.
|
||||
|
||||
This function allows checking whether two booster objects share the same 'externalptr',
|
||||
regardless of the R attributes that they might have.
|
||||
|
||||
In order to duplicate a booster in such a way that the copy wouldn't share the same
|
||||
'externalptr', one can use function \link{xgb.copy.Booster}.
|
||||
'externalptr', one can use function \code{\link[=xgb.copy.Booster]{xgb.copy.Booster()}}.
|
||||
}
|
||||
\examples{
|
||||
library(xgboost)
|
||||
|
||||
data(mtcars)
|
||||
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
|
||||
model <- xgb.train(
|
||||
params = list(nthread = 1),
|
||||
data = xgb.DMatrix(x, label = y, nthread = 1),
|
||||
@ -55,5 +58,5 @@ xgb.attr(model, "my_attr") # gets modified
|
||||
xgb.attr(model_deep_copy, "my_attr") # doesn't get modified
|
||||
}
|
||||
\seealso{
|
||||
\link{xgb.copy.Booster}
|
||||
\code{\link[=xgb.copy.Booster]{xgb.copy.Booster()}}
|
||||
}
|
||||
|
||||
@ -2,32 +2,32 @@
|
||||
% Please edit documentation in R/xgb.load.R
|
||||
\name{xgb.load}
|
||||
\alias{xgb.load}
|
||||
\title{Load xgboost model from binary file}
|
||||
\title{Load XGBoost model from binary file}
|
||||
\usage{
|
||||
xgb.load(modelfile)
|
||||
}
|
||||
\arguments{
|
||||
\item{modelfile}{the name of the binary input file.}
|
||||
\item{modelfile}{The name of the binary input file.}
|
||||
}
|
||||
\value{
|
||||
An object of \code{xgb.Booster} class.
|
||||
}
|
||||
\description{
|
||||
Load xgboost model from the binary model file.
|
||||
Load XGBoost model from binary model file.
|
||||
}
|
||||
\details{
|
||||
The input file is expected to contain a model saved in an xgboost model format
|
||||
using either \code{\link{xgb.save}} or \code{\link{xgb.cb.save.model}} in R, or using some
|
||||
appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
|
||||
saved from there in xgboost format, could be loaded from R.
|
||||
The input file is expected to contain a model saved in an XGBoost model format
|
||||
using either \code{\link[=xgb.save]{xgb.save()}} in R, or using some
|
||||
appropriate methods from other XGBoost interfaces. E.g., a model trained in Python and
|
||||
saved from there in XGBoost format, could be loaded from R.
|
||||
|
||||
Note: a model saved as an R-object, has to be loaded using corresponding R-methods,
|
||||
not \code{xgb.load}.
|
||||
Note: a model saved as an R object has to be loaded using corresponding R-methods,
|
||||
not by \code{\link[=xgb.load]{xgb.load()}}.
|
||||
}
|
||||
\examples{
|
||||
\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
data(agaricus.train, package = "xgboost")
|
||||
data(agaricus.test, package = "xgboost")
|
||||
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
@ -35,6 +35,7 @@ data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
|
||||
bst <- xgb.train(
|
||||
data = xgb.DMatrix(train$data, label = train$label),
|
||||
max_depth = 2,
|
||||
@ -49,5 +50,5 @@ xgb.save(bst, fname)
|
||||
bst <- xgb.load(fname)
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{xgb.save}}
|
||||
\code{\link[=xgb.save]{xgb.save()}}
|
||||
}
|
||||
|
||||
@ -2,13 +2,13 @@
|
||||
% Please edit documentation in R/xgb.load.raw.R
|
||||
\name{xgb.load.raw}
|
||||
\alias{xgb.load.raw}
|
||||
\title{Load serialised xgboost model from R's raw vector}
|
||||
\title{Load serialised XGBoost model from R's raw vector}
|
||||
\usage{
|
||||
xgb.load.raw(buffer)
|
||||
}
|
||||
\arguments{
|
||||
\item{buffer}{the buffer returned by xgb.save.raw}
|
||||
\item{buffer}{The buffer returned by \code{\link[=xgb.save.raw]{xgb.save.raw()}}.}
|
||||
}
|
||||
\description{
|
||||
User can generate raw memory buffer by calling xgb.save.raw
|
||||
User can generate raw memory buffer by calling \code{\link[=xgb.save.raw]{xgb.save.raw()}}.
|
||||
}
|
||||
|
||||
@ -13,15 +13,14 @@ xgb.model.dt.tree(
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can be set through
|
||||
\link{setinfo}), they will be used in the output from this function.}
|
||||
\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can
|
||||
be set through \code{\link[=setinfo]{setinfo()}}), they will be used in the output from this function.}
|
||||
|
||||
\item{text}{Character vector previously generated by the function \code{\link[=xgb.dump]{xgb.dump()}}
|
||||
(called with parameter \code{with_stats = TRUE}). \code{text} takes precedence over \code{model}.}
|
||||
|
||||
\item{trees}{An integer vector of tree indices that should be used.
|
||||
The default (\code{NULL}) uses all trees.
|
||||
Useful, e.g., in multiclass classification to get only
|
||||
\item{trees}{An integer vector of tree indices that should be used. The default
|
||||
(\code{NULL}) uses all trees. Useful, e.g., in multiclass classification to get only
|
||||
the trees of one class. \emph{Important}: the tree index in XGBoost models
|
||||
is zero-based (e.g., use \code{trees = 0:4} for the first five trees).}
|
||||
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
xgb.parameters(object) <- value
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class \code{xgb.Booster}. \bold{Will be modified in-place}.}
|
||||
\item{object}{Object of class \code{xgb.Booster}. \strong{Will be modified in-place}.}
|
||||
|
||||
\item{value}{A list (or an object coercible to a list) with the names of parameters to set
|
||||
and the elements corresponding to parameter values.}
|
||||
@ -16,21 +16,22 @@ and the elements corresponding to parameter values.}
|
||||
The same booster \code{object}, which gets modified in-place.
|
||||
}
|
||||
\description{
|
||||
Only the setter for xgboost parameters is currently implemented.
|
||||
Only the setter for XGBoost parameters is currently implemented.
|
||||
}
|
||||
\details{
|
||||
Just like \link{xgb.attr}, this function will make in-place modifications
|
||||
Just like \code{\link[=xgb.attr]{xgb.attr()}}, this function will make in-place modifications
|
||||
on the booster object which do not follow typical R assignment semantics - that is,
|
||||
all references to the same booster will also be updated, unlike assingment of R
|
||||
attributes which follow copy-on-write semantics.
|
||||
|
||||
See \link{xgb.copy.Booster} for an example of this behavior.
|
||||
See \code{\link[=xgb.copy.Booster]{xgb.copy.Booster()}} for an example of this behavior.
|
||||
|
||||
Be aware that setting parameters of a fitted booster related to training continuation / updates
|
||||
will reset its number of rounds indicator to zero.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package = "xgboost")
|
||||
|
||||
train <- agaricus.train
|
||||
|
||||
bst <- xgb.train(
|
||||
|
||||
@ -18,7 +18,8 @@ xgb.plot.deepness(
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{Either an \code{xgb.Booster} model, or the "data.table" returned by \code{\link[=xgb.model.dt.tree]{xgb.model.dt.tree()}}.}
|
||||
\item{model}{Either an \code{xgb.Booster} model, or the "data.table" returned
|
||||
by \code{\link[=xgb.model.dt.tree]{xgb.model.dt.tree()}}.}
|
||||
|
||||
\item{which}{Which distribution to plot (see details).}
|
||||
|
||||
|
||||
@ -67,8 +67,8 @@ Represents previously calculated feature importance as a bar graph.
|
||||
}
|
||||
}
|
||||
\details{
|
||||
The graph represents each feature as a horizontal bar of length proportional to the importance of a feature.
|
||||
Features are sorted by decreasing importance.
|
||||
The graph represents each feature as a horizontal bar of length proportional to the
|
||||
importance of a feature. Features are sorted by decreasing importance.
|
||||
It works for both "gblinear" and "gbtree" models.
|
||||
|
||||
When \code{rel_to_first = FALSE}, the values would be plotted as in \code{importance_matrix}.
|
||||
|
||||
@ -21,11 +21,11 @@ xgb.plot.multi.trees(
|
||||
by default 5.}
|
||||
|
||||
\item{plot_width, plot_height}{Width and height of the graph in pixels.
|
||||
The values are passed to \code{\link[DiagrammeR:render_graph]{DiagrammeR::render_graph()}}.}
|
||||
The values are passed to \code{DiagrammeR::render_graph()}.}
|
||||
|
||||
\item{render}{Should the graph be rendered or not? The default is \code{TRUE}.}
|
||||
|
||||
\item{...}{currently not used.}
|
||||
\item{...}{Currently not used.}
|
||||
}
|
||||
\value{
|
||||
The value depends on the \code{render} parameter:
|
||||
@ -35,7 +35,7 @@ class \code{grViz}. Similar to "ggplot" objects, it needs to be printed when not
|
||||
running from the command line.
|
||||
\item If \code{render = FALSE}: Graph object which is of DiagrammeR's class \code{dgr_graph}.
|
||||
This could be useful if one wants to modify some of the graph attributes
|
||||
before rendering the graph with \code{\link[DiagrammeR:render_graph]{DiagrammeR::render_graph()}}.
|
||||
before rendering the graph with \code{DiagrammeR::render_graph()}.
|
||||
}
|
||||
}
|
||||
\description{
|
||||
|
||||
@ -38,12 +38,11 @@ xgb.plot.shap(
|
||||
\item{shap_contrib}{Matrix of SHAP contributions of \code{data}.
|
||||
The default (\code{NULL}) computes it from \code{model} and \code{data}.}
|
||||
|
||||
\item{features}{Vector of column indices or feature names to plot.
|
||||
When \code{NULL} (default), the \code{top_n} most important features are selected
|
||||
by \code{\link[=xgb.importance]{xgb.importance()}}.}
|
||||
\item{features}{Vector of column indices or feature names to plot. When \code{NULL}
|
||||
(default), the \code{top_n} most important features are selected by \code{\link[=xgb.importance]{xgb.importance()}}.}
|
||||
|
||||
\item{top_n}{How many of the most important features (<= 100) should be selected?
|
||||
By default 1 for SHAP dependence and 10 for SHAP summary).
|
||||
By default 1 for SHAP dependence and 10 for SHAP summary.
|
||||
Only used when \code{features = NULL}.}
|
||||
|
||||
\item{model}{An \code{xgb.Booster} model. Only required when \code{shap_contrib = NULL} or
|
||||
@ -173,6 +172,7 @@ mbst <- xgb.train(
|
||||
)
|
||||
trees0 <- seq(from = 0, by = nclass, length.out = nrounds)
|
||||
col <- rgb(0, 0, 1, 0.5)
|
||||
|
||||
xgb.plot.shap(
|
||||
x,
|
||||
model = mbst,
|
||||
|
||||
@ -35,12 +35,11 @@ xgb.plot.shap.summary(
|
||||
\item{shap_contrib}{Matrix of SHAP contributions of \code{data}.
|
||||
The default (\code{NULL}) computes it from \code{model} and \code{data}.}
|
||||
|
||||
\item{features}{Vector of column indices or feature names to plot.
|
||||
When \code{NULL} (default), the \code{top_n} most important features are selected
|
||||
by \code{\link[=xgb.importance]{xgb.importance()}}.}
|
||||
\item{features}{Vector of column indices or feature names to plot. When \code{NULL}
|
||||
(default), the \code{top_n} most important features are selected by \code{\link[=xgb.importance]{xgb.importance()}}.}
|
||||
|
||||
\item{top_n}{How many of the most important features (<= 100) should be selected?
|
||||
By default 1 for SHAP dependence and 10 for SHAP summary).
|
||||
By default 1 for SHAP dependence and 10 for SHAP summary.
|
||||
Only used when \code{features = NULL}.}
|
||||
|
||||
\item{model}{An \code{xgb.Booster} model. Only required when \code{shap_contrib = NULL} or
|
||||
|
||||
@ -26,13 +26,14 @@ the trees of one class. \emph{Important}: the tree index in XGBoost models
|
||||
is zero-based (e.g., use \code{trees = 0:2} for the first three trees).}
|
||||
|
||||
\item{plot_width, plot_height}{Width and height of the graph in pixels.
|
||||
The values are passed to \code{\link[DiagrammeR:render_graph]{DiagrammeR::render_graph()}}.}
|
||||
The values are passed to \code{DiagrammeR::render_graph()}.}
|
||||
|
||||
\item{render}{Should the graph be rendered or not? The default is \code{TRUE}.}
|
||||
|
||||
\item{show_node_id}{a logical flag for whether to show node id's in the graph.}
|
||||
|
||||
\item{style}{Style to use for the plot. Options are:\itemize{
|
||||
\item{style}{Style to use for the plot:
|
||||
\itemize{
|
||||
\item \code{"xgboost"}: will use the plot style defined in the core XGBoost library,
|
||||
which is shared between different interfaces through the 'dot' format. This
|
||||
style was not available before version 2.1.0 in R. It always plots the trees
|
||||
@ -42,13 +43,14 @@ the introducition of the standardized style from the core library. It might plot
|
||||
the trees horizontally (from left to right).
|
||||
}
|
||||
|
||||
Note that \code{style="xgboost"} is only supported when all of the following conditions are met:\itemize{
|
||||
Note that \code{style="xgboost"} is only supported when all of the following conditions are met:
|
||||
\itemize{
|
||||
\item Only a single tree is being plotted.
|
||||
\item Node IDs are not added to the graph.
|
||||
\item The graph is being returned as \code{htmlwidget} (\code{render=TRUE}).
|
||||
}}
|
||||
|
||||
\item{...}{currently not used.}
|
||||
\item{...}{Currently not used.}
|
||||
}
|
||||
\value{
|
||||
The value depends on the \code{render} parameter:
|
||||
@ -58,7 +60,7 @@ class \code{grViz}. Similar to "ggplot" objects, it needs to be printed when not
|
||||
running from the command line.
|
||||
\item If \code{render = FALSE}: Graph object which is of DiagrammeR's class \code{dgr_graph}.
|
||||
This could be useful if one wants to modify some of the graph attributes
|
||||
before rendering the graph with \code{\link[DiagrammeR:render_graph]{DiagrammeR::render_graph()}}.
|
||||
before rendering the graph with \code{DiagrammeR::render_graph()}.
|
||||
}
|
||||
}
|
||||
\description{
|
||||
|
||||
@ -2,21 +2,20 @@
|
||||
% Please edit documentation in R/xgb.save.R
|
||||
\name{xgb.save}
|
||||
\alias{xgb.save}
|
||||
\title{Save xgboost model to binary file}
|
||||
\title{Save XGBoost model to binary file}
|
||||
\usage{
|
||||
xgb.save(model, fname)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{Model object of \code{xgb.Booster} class.}
|
||||
|
||||
\item{fname}{Name of the file to write.
|
||||
|
||||
Note that the extension of this file name determined the serialization format to use:\itemize{
|
||||
\item Extension ".ubj" will use the universal binary JSON format (recommended).
|
||||
\item{fname}{Name of the file to write. Its extension determines the serialization format:
|
||||
\itemize{
|
||||
\item ".ubj": Use the universal binary JSON format (recommended).
|
||||
This format uses binary types for e.g. floating point numbers, thereby preventing any loss
|
||||
of precision when converting to a human-readable JSON text or similar.
|
||||
\item Extension ".json" will use plain JSON, which is a human-readable format.
|
||||
\item Extension ".deprecated" will use a \bold{deprecated} binary format. This format will
|
||||
\item ".json": Use plain JSON, which is a human-readable format.
|
||||
\item ".deprecated": Use \strong{deprecated} binary format. This format will
|
||||
not be able to save attributes introduced after v1 of XGBoost, such as the "best_iteration"
|
||||
attribute that boosters might keep, nor feature names or user-specifiec attributes.
|
||||
\item If the format is not specified by passing one of the file extensions above, will
|
||||
@ -24,26 +23,25 @@ default to UBJ.
|
||||
}}
|
||||
}
|
||||
\description{
|
||||
Save xgboost model to a file in binary or JSON format.
|
||||
Save XGBoost model to a file in binary or JSON format.
|
||||
}
|
||||
\details{
|
||||
This methods allows to save a model in an xgboost-internal binary or text format which is universal
|
||||
among the various xgboost interfaces. In R, the saved model file could be read-in later
|
||||
using either the \code{\link{xgb.load}} function or the \code{xgb_model} parameter
|
||||
of \code{\link{xgb.train}}.
|
||||
This methods allows to save a model in an XGBoost-internal binary or text format which is universal
|
||||
among the various xgboost interfaces. In R, the saved model file could be read later
|
||||
using either the \code{\link[=xgb.load]{xgb.load()}} function or the \code{xgb_model} parameter of \code{\link[=xgb.train]{xgb.train()}}.
|
||||
|
||||
Note: a model can also be saved as an R-object (e.g., by using \code{\link[base]{readRDS}}
|
||||
or \code{\link[base]{save}}). However, it would then only be compatible with R, and
|
||||
corresponding R-methods would need to be used to load it. Moreover, persisting the model with
|
||||
\code{\link[base]{readRDS}} or \code{\link[base]{save}}) might cause compatibility problems in
|
||||
future versions of XGBoost. Consult \code{\link{a-compatibility-note-for-saveRDS-save}} to learn
|
||||
how to persist models in a future-proof way, i.e. to make the model accessible in future
|
||||
Note: a model can also be saved as an R object (e.g., by using \code{\link[=readRDS]{readRDS()}}
|
||||
or \code{\link[=save]{save()}}). However, it would then only be compatible with R, and
|
||||
corresponding R methods would need to be used to load it. Moreover, persisting the model with
|
||||
\code{\link[=readRDS]{readRDS()}} or \code{\link[=save]{save()}} might cause compatibility problems in
|
||||
future versions of XGBoost. Consult \link{a-compatibility-note-for-saveRDS-save} to learn
|
||||
how to persist models in a future-proof way, i.e., to make the model accessible in future
|
||||
releases of XGBoost.
|
||||
}
|
||||
\examples{
|
||||
\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
data(agaricus.train, package = "xgboost")
|
||||
data(agaricus.test, package = "xgboost")
|
||||
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
@ -51,6 +49,7 @@ data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
|
||||
bst <- xgb.train(
|
||||
data = xgb.DMatrix(train$data, label = train$label),
|
||||
max_depth = 2,
|
||||
@ -59,10 +58,11 @@ bst <- xgb.train(
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
|
||||
fname <- file.path(tempdir(), "xgb.ubj")
|
||||
xgb.save(bst, fname)
|
||||
bst <- xgb.load(fname)
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{xgb.load}}
|
||||
\code{\link[=xgb.load]{xgb.load()}}
|
||||
}
|
||||
|
||||
@ -2,37 +2,44 @@
|
||||
% Please edit documentation in R/xgb.save.raw.R
|
||||
\name{xgb.save.raw}
|
||||
\alias{xgb.save.raw}
|
||||
\title{Save xgboost model to R's raw vector,
|
||||
user can call xgb.load.raw to load the model back from raw vector}
|
||||
\title{Save XGBoost model to R's raw vector}
|
||||
\usage{
|
||||
xgb.save.raw(model, raw_format = "ubj")
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{the model object.}
|
||||
\item{model}{The model object.}
|
||||
|
||||
\item{raw_format}{The format for encoding the booster. Available options are
|
||||
\item{raw_format}{The format for encoding the booster:
|
||||
\itemize{
|
||||
\item \code{json}: Encode the booster into JSON text document.
|
||||
\item \code{ubj}: Encode the booster into Universal Binary JSON.
|
||||
\item \code{deprecated}: Encode the booster into old customized binary format.
|
||||
\item "json": Encode the booster into JSON text document.
|
||||
\item "ubj": Encode the booster into Universal Binary JSON.
|
||||
\item "deprecated": Encode the booster into old customized binary format.
|
||||
}}
|
||||
}
|
||||
\description{
|
||||
Save xgboost model from xgboost or xgb.train
|
||||
Save XGBoost model from \code{\link[=xgboost]{xgboost()}} or \code{\link[=xgb.train]{xgb.train()}}.
|
||||
Call \code{\link[=xgb.load.raw]{xgb.load.raw()}} to load the model back from raw vector.
|
||||
}
|
||||
\examples{
|
||||
\dontshow{RhpcBLASctl::omp_set_num_threads(1)}
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
data(agaricus.train, package = "xgboost")
|
||||
data(agaricus.test, package = "xgboost")
|
||||
|
||||
## Keep the number of threads to 2 for examples
|
||||
nthread <- 2
|
||||
## Keep the number of threads to 1 for examples
|
||||
nthread <- 1
|
||||
data.table::setDTthreads(nthread)
|
||||
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgb.train(data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
|
||||
eta = 1, nthread = nthread, nrounds = 2,objective = "binary:logistic")
|
||||
|
||||
bst <- xgb.train(
|
||||
data = xgb.DMatrix(train$data, label = train$label),
|
||||
max_depth = 2,
|
||||
eta = 1,
|
||||
nthread = nthread,
|
||||
nrounds = 2,
|
||||
objective = "binary:logistic"
|
||||
)
|
||||
|
||||
raw <- xgb.save.raw(bst)
|
||||
bst <- xgb.load.raw(raw)
|
||||
|
||||
@ -18,10 +18,9 @@ xgb.slice.Booster(
|
||||
\item{model, x}{A fitted \code{xgb.Booster} object, which is to be sliced by taking only a subset
|
||||
of its rounds / iterations.}
|
||||
|
||||
\item{start}{Start of the slice (base-1 and inclusive, like R's \link{seq}).}
|
||||
|
||||
\item{end}{End of the slice (base-1 and inclusive, like R's \link{seq}).
|
||||
\item{start}{Start of the slice (base-1 and inclusive, like R's \code{\link[=seq]{seq()}}).}
|
||||
|
||||
\item{end}{End of the slice (base-1 and inclusive, like R's \code{\link[=seq]{seq()}}).
|
||||
Passing a value of zero here is equivalent to passing the full number of rounds in the
|
||||
booster object.}
|
||||
|
||||
@ -43,8 +42,10 @@ the resulting object.
|
||||
}
|
||||
\examples{
|
||||
data(mtcars)
|
||||
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
|
||||
model_slice <- xgb.slice.Booster(model, 1, 3)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user