[R] Rename watchlist -> evals (#10032)

This commit is contained in:
david-cortes 2024-03-09 23:48:06 +01:00 committed by GitHub
parent 2c13f90384
commit b023a253b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 218 additions and 221 deletions

View File

@ -56,10 +56,10 @@
#' For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
#' \item `dtrain`: The training data for the fold (as an `xgb.DMatrix` object).
#' \item `bst`: Rhe `xgb.Booster` object for the fold.
#' \item `watchlist`: A list with two DMatrices, with names `train` and `test`
#' \item `evals`: A list containing two DMatrices, with names `train` and `test`
#' (`test` is the held-out data for the fold).
#' \item `index`: The indices of the hold-out data for that fold (base-1 indexing),
#' from which the `test` entry in the watchlist was obtained.
#' from which the `test` entry in `evals` was obtained.
#' }
#'
#' This object should \bold{not} be in-place modified in ways that conflict with the
@ -78,7 +78,7 @@
#' Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
#' folds can be found in the `model` object.
#'
#' \item watchlist The evaluation watchlist, as passed under argument `watchlist` to
#' \item evals The evaluation data, as passed under argument `evals` to
#' \link{xgb.train}.
#'
#' For \link{xgb.cv}, this will always be `NULL`.
@ -101,15 +101,15 @@
#' \item iteration Index of the iteration number that is being executed (first iteration
#' will be the same as parameter `begin_iteration`, then next one will add +1, and so on).
#'
#' \item iter_feval Evaluation metrics for the `watchlist` that was supplied, either
#' \item iter_feval Evaluation metrics for `evals` that were supplied, either
#' determined by the objective, or by parameter `feval`.
#'
#' For \link{xgb.train}, this will be a named vector with one entry per element in
#' `watchlist`, where the names are determined as 'watchlist name' + '-' + 'metric name' - for
#' example, if `watchlist` contains an entry named "tr" and the metric is "rmse",
#' `evals`, where the names are determined as 'evals name' + '-' + 'metric name' - for
#' example, if `evals` contains an entry named "tr" and the metric is "rmse",
#' this will be a one-element vector with name "tr-rmse".
#'
#' For \link{xgb.cv}, this will be a 2d matrix with dimensions `[length(watchlist), nfolds]`,
#' For \link{xgb.cv}, this will be a 2d matrix with dimensions `[length(evals), nfolds]`,
#' where the row names will follow the same naming logic as the one-dimensional vector
#' that is passed in \link{xgb.train}.
#'
@ -169,18 +169,18 @@
#' }
#' @examples
#' # Example constructing a custom callback that calculates
#' # squared error on the training data, without a watchlist,
#' # squared error on the training data (no separate test set),
#' # and outputs the per-iteration results.
#' ssq_callback <- xgb.Callback(
#' cb_name = "ssq",
#' f_before_training = function(env, model, data, watchlist,
#' f_before_training = function(env, model, data, evals,
#' begin_iteration, end_iteration) {
#' # A vector to keep track of a number at each iteration
#' env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
#' },
#' f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
#' f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
#' # This calculates the sum of squared errors on the training data.
#' # Note that this can be better done by passing a 'watchlist' entry,
#' # Note that this can be better done by passing an 'evals' entry,
#' # but this demonstrates a way in which callbacks can be structured.
#' pred <- predict(model, data)
#' err <- pred - getinfo(data, "label")
@ -196,7 +196,7 @@
#' # A return value of 'TRUE' here would signal to finalize the training
#' return(FALSE)
#' },
#' f_after_training = function(env, model, data, watchlist, iteration,
#' f_after_training = function(env, model, data, evals, iteration,
#' final_feval, prev_cb_res) {
#' return(env$logs)
#' }
@ -220,10 +220,10 @@
xgb.Callback <- function(
cb_name = "custom_callback",
env = new.env(),
f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) NULL,
f_before_iter = function(env, model, data, watchlist, iteration) NULL,
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL,
f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) NULL
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) NULL,
f_before_iter = function(env, model, data, evals, iteration) NULL,
f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) NULL
) {
stopifnot(is.null(f_before_training) || is.function(f_before_training))
stopifnot(is.null(f_before_iter) || is.function(f_before_iter))
@ -251,7 +251,7 @@ xgb.Callback <- function(
callbacks,
model,
data,
watchlist,
evals,
begin_iteration,
end_iteration
) {
@ -261,7 +261,7 @@ xgb.Callback <- function(
callback$env,
model,
data,
watchlist,
evals,
begin_iteration,
end_iteration
)
@ -273,7 +273,7 @@ xgb.Callback <- function(
callbacks,
model,
data,
watchlist,
evals,
iteration
) {
if (!length(callbacks)) {
@ -287,7 +287,7 @@ xgb.Callback <- function(
cb$env,
model,
data,
watchlist,
evals,
iteration
)
if (!NROW(should_stop)) {
@ -304,7 +304,7 @@ xgb.Callback <- function(
callbacks,
model,
data,
watchlist,
evals,
iteration,
iter_feval
) {
@ -319,7 +319,7 @@ xgb.Callback <- function(
cb$env,
model,
data,
watchlist,
evals,
iteration,
iter_feval
)
@ -337,7 +337,7 @@ xgb.Callback <- function(
callbacks,
model,
data,
watchlist,
evals,
iteration,
final_feval,
prev_cb_res
@ -355,7 +355,7 @@ xgb.Callback <- function(
cb$env,
model,
data,
watchlist,
evals,
iteration,
final_feval,
getElement(old_cb_res, cb$cb_name)
@ -428,7 +428,7 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
env = as.environment(list(period = period, showsd = showsd, is_first_call = TRUE)),
f_before_training = NULL,
f_before_iter = NULL,
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
if (is.null(iter_feval)) {
return(FALSE)
}
@ -439,7 +439,7 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
env$is_first_call <- FALSE
return(FALSE)
},
f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
if (is.null(final_feval)) {
return(NULL)
}
@ -453,7 +453,7 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
#' @title Callback for logging the evaluation history
#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
#' @details This callback creates a table with per-iteration evaluation metrics (see parameters
#' `watchlist` and `feval` in \link{xgb.train}).
#' `evals` and `feval` in \link{xgb.train}).
#' @details
#' Note: in the column names of the final data.table, the dash '-' character is replaced with
#' the underscore '_' in order to make the column names more like regular R identifiers.
@ -462,18 +462,18 @@ xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
xgb.cb.evaluation.log <- function() {
xgb.Callback(
cb_name = "evaluation_log",
f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
env$evaluation_log <- vector("list", end_iteration - begin_iteration + 1)
env$next_log <- 1
},
f_before_iter = NULL,
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
tmp <- .summarize.feval(iter_feval, TRUE)
env$evaluation_log[[env$next_log]] <- list(iter = iteration, metrics = tmp$feval, sds = tmp$stdev)
env$next_log <- env$next_log + 1
return(FALSE)
},
f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
if (!NROW(env$evaluation_log)) {
return(prev_cb_res)
}
@ -543,7 +543,7 @@ xgb.cb.reset.parameters <- function(new_params) {
xgb.Callback(
cb_name = "reset_parameters",
env = as.environment(list(new_params = new_params)),
f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
env$end_iteration <- end_iteration
pnames <- gsub(".", "_", names(env$new_params), fixed = TRUE)
@ -560,7 +560,7 @@ xgb.cb.reset.parameters <- function(new_params) {
}
}
},
f_before_iter = function(env, model, data, watchlist, iteration) {
f_before_iter = function(env, model, data, evals, iteration) {
pars <- lapply(env$new_params, function(p) {
if (is.function(p)) {
return(p(iteration, env$end_iteration))
@ -589,9 +589,9 @@ xgb.cb.reset.parameters <- function(new_params) {
#' @param maximize Whether to maximize the evaluation metric.
#' @param metric_name The name of an evaluation column to use as a criteria for early
#' stopping. If not set, the last column would be used.
#' Let's say the test data in \code{watchlist} was labelled as \code{dtest},
#' Let's say the test data in \code{evals} was labelled as \code{dtest},
#' and one wants to use the AUC in test data for early stopping regardless of where
#' it is in the \code{watchlist}, then one of the following would need to be set:
#' it is in the \code{evals}, then one of the following would need to be set:
#' \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
#' All dash '-' characters in metric names are considered equivalent to '_'.
#' @param verbose Whether to print the early stopping information.
@ -615,7 +615,7 @@ xgb.cb.reset.parameters <- function(new_params) {
#' base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
#' through \link{xgb.attr} or \link{xgb.attributes}.
#'
#' At least one data element is required in the evaluation watchlist for early stopping to work.
#' At least one dataset is required in `evals` for early stopping to work.
#' @export
xgb.cb.early.stop <- function(
stopping_rounds,
@ -642,15 +642,15 @@ xgb.cb.early.stop <- function(
stopped_by_max_rounds = FALSE
)
),
f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
if (inherits(model, "xgb.Booster") && !length(watchlist)) {
stop("For early stopping, watchlist must have at least one element")
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
if (inherits(model, "xgb.Booster") && !length(evals)) {
stop("For early stopping, 'evals' must have at least one element")
}
env$begin_iteration <- begin_iteration
return(NULL)
},
f_before_iter = function(env, model, data, watchlist, iteration) NULL,
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
f_before_iter = function(env, model, data, evals, iteration) NULL,
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
sds <- NULL
if (NCOL(iter_feval) > 1) {
tmp <- .summarize.feval(iter_feval, TRUE)
@ -729,7 +729,7 @@ xgb.cb.early.stop <- function(
}
return(FALSE)
},
f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
if (inherits(model, "xgb.Booster") && !env$keep_all_iter && env$best_iteration < iteration) {
# Note: it loses the attributes after being sliced,
# so they have to be re-assigned afterwards.
@ -798,18 +798,18 @@ xgb.cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
xgb.Callback(
cb_name = "save_model",
env = as.environment(list(save_period = save_period, save_name = save_name, last_save = 0)),
f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
env$begin_iteration <- begin_iteration
},
f_before_iter = NULL,
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
if (env$save_period > 0 && (iteration - env$begin_iteration) %% env$save_period == 0) {
.save.model.w.formatted.name(model, env$save_name, iteration)
env$last_save <- iteration
}
return(FALSE)
},
f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
if (env$save_period == 0 && iteration > env$last_save) {
.save.model.w.formatted.name(model, env$save_name, iteration)
}
@ -840,19 +840,19 @@ xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
xgb.Callback(
cb_name = "cv_predict",
env = as.environment(list(save_models = save_models, outputmargin = outputmargin)),
f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
if (inherits(model, "xgb.Booster")) {
stop("'cv.predict' callback is only for 'xgb.cv'.")
}
},
f_before_iter = NULL,
f_after_iter = NULL,
f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
pred <- NULL
for (fd in model) {
pr <- predict(
fd$bst,
fd$watchlist[[2L]],
fd$evals[[2L]],
outputmargin = env$outputmargin,
reshape = TRUE
)
@ -1002,7 +1002,7 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
xgb.Callback(
cb_name = "gblinear_history",
env = as.environment(list(sparse = sparse)),
f_before_training = function(env, model, data, watchlist, begin_iteration, end_iteration) {
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
if (!inherits(model, "xgb.Booster")) {
model <- model[[1L]]$bst
}
@ -1013,7 +1013,7 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
env$next_idx <- 1
},
f_before_iter = NULL,
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
if (inherits(model, "xgb.Booster")) {
coef_this <- .extract.coef(model, env$sparse)
} else {
@ -1023,7 +1023,7 @@ xgb.cb.gblinear.history <- function(sparse = FALSE) {
env$next_idx <- env$next_idx + 1
return(FALSE)
},
f_after_training = function(env, model, data, watchlist, iteration, final_feval, prev_cb_res) {
f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
# in case of early stopping
if (env$next_idx <= length(env$coef_hist)) {
env$coef_hist <- head(env$coef_hist, env$next_idx - 1)

View File

@ -193,20 +193,20 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) {
# Evaluate one iteration.
# Returns a named vector of evaluation metrics
# with the names in a 'datasetname-metricname' format.
xgb.iter.eval <- function(bst, watchlist, iter, feval) {
xgb.iter.eval <- function(bst, evals, iter, feval) {
handle <- xgb.get.handle(bst)
if (length(watchlist) == 0)
if (length(evals) == 0)
return(NULL)
evnames <- names(watchlist)
evnames <- names(evals)
if (is.null(feval)) {
msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), watchlist, as.list(evnames))
msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), evals, as.list(evnames))
mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
} else {
res <- sapply(seq_along(watchlist), function(j) {
w <- watchlist[[j]]
res <- sapply(seq_along(evals), function(j) {
w <- evals[[j]]
## predict using all trees
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
eval_res <- feval(preds, w)

View File

@ -71,7 +71,6 @@
#' new.dtest <- xgb.DMatrix(
#' data = new.features.test, label = agaricus.test$label, nthread = 2
#' )
#' watchlist <- list(train = new.dtrain)
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
#'
#' # Model accuracy with new features

View File

@ -215,7 +215,7 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
modelfile = NULL
)
bst <- bst$bst
list(dtrain = dtrain, bst = bst, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
list(dtrain = dtrain, bst = bst, evals = list(train = dtrain, test = dtest), index = folds[[k]])
})
# extract parameters that can affect the relationship b/w #trees and #iterations
@ -254,7 +254,7 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
)
xgb.iter.eval(
bst = fd$bst,
watchlist = fd$watchlist,
evals = fd$evals,
iter = iteration - 1,
feval = feval
)

View File

@ -114,13 +114,13 @@
#' @param data training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
#' \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.
#' @param nrounds max number of boosting iterations.
#' @param watchlist named list of xgb.DMatrix datasets to use for evaluating model performance.
#' @param evals Named list of `xgb.DMatrix` datasets to use for evaluating model performance.
#' Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
#' of these datasets during each boosting iteration, and stored in the end as a field named
#' \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
#' \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
#' printed out during the training.
#' E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
#' E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
#' the performance of each round's model on mat1 and mat2.
#' @param obj customized objective function. Returns gradient and second order
#' gradient with given prediction and dtrain.
@ -171,7 +171,7 @@
#' @details
#' These are the training functions for \code{xgboost}.
#'
#' The \code{xgb.train} interface supports advanced features such as \code{watchlist},
#' The \code{xgb.train} interface supports advanced features such as \code{evals},
#' customized objective and evaluation metric functions, therefore it is more flexible
#' than the \code{xgboost} interface.
#'
@ -209,7 +209,7 @@
#' \itemize{
#' \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
#' and the \code{print_every_n} parameter is passed to it.
#' \item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present.
#' \item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
#' \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
#' \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
#' }
@ -254,12 +254,12 @@
#' dtest <- with(
#' agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
#' watchlist <- list(train = dtrain, eval = dtest)
#' evals <- list(train = dtrain, eval = dtest)
#'
#' ## A simple xgb.train example:
#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
#' objective = "binary:logistic", eval_metric = "auc")
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
#'
#' ## An xgb.train example where custom objective and evaluation metric are
#' ## used:
@ -280,15 +280,15 @@
#' # as 'objective' and 'eval_metric' parameters in the params list:
#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
#' objective = logregobj, eval_metric = evalerror)
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
#'
#' # or through the ... arguments:
#' param <- list(max_depth = 2, eta = 1, nthread = nthread)
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
#' objective = logregobj, eval_metric = evalerror)
#'
#' # or as dedicated 'obj' and 'feval' parameters of xgb.train:
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
#' obj = logregobj, feval = evalerror)
#'
#'
@ -296,11 +296,11 @@
#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
#' objective = "binary:logistic", eval_metric = "auc")
#' my_etas <- list(eta = c(0.5, 0.1))
#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
#' callbacks = list(xgb.cb.reset.parameters(my_etas)))
#'
#' ## Early stopping:
#' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
#' bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
#' early_stopping_rounds = 3)
#'
#' ## An 'xgboost' interface example:
@ -311,7 +311,7 @@
#'
#' @rdname xgb.train
#' @export
xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
xgb.train <- function(params = list(), data, nrounds, evals = list(),
obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
early_stopping_rounds = NULL, maximize = NULL,
save_period = NULL, save_name = "xgboost.model",
@ -324,17 +324,17 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
check.custom.obj()
check.custom.eval()
# data & watchlist checks
# data & evals checks
dtrain <- data
if (!inherits(dtrain, "xgb.DMatrix"))
stop("second argument dtrain must be xgb.DMatrix")
if (length(watchlist) > 0) {
if (typeof(watchlist) != "list" ||
!all(vapply(watchlist, inherits, logical(1), what = 'xgb.DMatrix')))
stop("watchlist must be a list of xgb.DMatrix elements")
evnames <- names(watchlist)
if (length(evals) > 0) {
if (typeof(evals) != "list" ||
!all(vapply(evals, inherits, logical(1), what = 'xgb.DMatrix')))
stop("'evals' must be a list of xgb.DMatrix elements")
evnames <- names(evals)
if (is.null(evnames) || any(evnames == ""))
stop("each element of the watchlist must have a name tag")
stop("each element of 'evals' must have a name tag")
}
# Handle multiple evaluation metrics given as a list
for (m in params$eval_metric) {
@ -370,8 +370,8 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
if (verbose && !("print_evaluation" %in% cb_names)) {
callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n))
}
# evaluation log callback: it is automatically enabled when watchlist is provided
if (length(watchlist) && !("evaluation_log" %in% cb_names)) {
# evaluation log callback: it is automatically enabled when 'evals' is provided
if (length(evals) && !("evaluation_log" %in% cb_names)) {
callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())
}
# Model saving callback
@ -385,7 +385,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
# Construct a booster (either a new one or load from xgb_model)
bst <- xgb.Booster(
params = params,
cachelist = append(watchlist, dtrain),
cachelist = append(evals, dtrain),
modelfile = xgb_model
)
niter_init <- bst$niter
@ -407,7 +407,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
callbacks,
bst,
dtrain,
watchlist,
evals,
begin_iteration,
end_iteration
)
@ -419,7 +419,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
callbacks,
bst,
dtrain,
watchlist,
evals,
iteration
)
@ -431,10 +431,10 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
)
bst_evaluation <- NULL
if (length(watchlist) > 0) {
if (length(evals) > 0) {
bst_evaluation <- xgb.iter.eval(
bst = bst,
watchlist = watchlist,
evals = evals,
iter = iteration - 1,
feval = feval
)
@ -444,7 +444,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
callbacks,
bst,
dtrain,
watchlist,
evals,
iteration,
bst_evaluation
)
@ -456,7 +456,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
callbacks,
bst,
dtrain,
watchlist,
evals,
iteration,
bst_evaluation
)

View File

@ -18,9 +18,9 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
nthread = merged$nthread
)
watchlist <- list(train = dtrain)
evals <- list(train = dtrain)
bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print_every_n = print_every_n,
bst <- xgb.train(params, dtrain, nrounds, evals, verbose = verbose, print_every_n = print_every_n,
early_stopping_rounds = early_stopping_rounds, maximize = maximize,
save_period = save_period, save_name = save_name,
xgb_model = xgb_model, callbacks = callbacks, ...)

View File

@ -74,17 +74,17 @@ print(paste("sum(abs(pred3-pred))=", sum(abs(pred3 - pred))))
# to use advanced features, we need to put data in xgb.DMatrix
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
dtest <- xgb.DMatrix(data = test$data, label = test$label)
#---------------Using watchlist----------------
# watchlist is a list of xgb.DMatrix, each of them is tagged with name
watchlist <- list(train = dtrain, test = dtest)
# to train with watchlist, use xgb.train, which contains more advanced features
# watchlist allows us to monitor the evaluation result on all data in the list
print("Train xgboost using xgb.train with watchlist")
bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
#---------------Using an evaluation set----------------
# 'evals' is a list of xgb.DMatrix, each of them is tagged with name
evals <- list(train = dtrain, test = dtest)
# to train with an evaluation set, use xgb.train, which contains more advanced features
# 'evals' argument allows us to monitor the evaluation result on all data in the list
print("Train xgboost using xgb.train with evaluation data")
bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
nthread = 2, objective = "binary:logistic")
# we can change evaluation metrics, or use multiple evaluation metrics
print("train xgboost using xgb.train with watchlist, watch logloss and error")
bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
print("train xgboost using xgb.train with evaluation data, watch logloss and error")
bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
eval_metric = "error", eval_metric = "logloss",
nthread = 2, objective = "binary:logistic")
@ -92,7 +92,7 @@ bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist =
xgb.DMatrix.save(dtrain, "dtrain.buffer")
# to load it in, simply call xgb.DMatrix
dtrain2 <- xgb.DMatrix("dtrain.buffer")
bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
nthread = 2, objective = "binary:logistic")
# information can be extracted from xgb.DMatrix using getinfo
label <- getinfo(dtest, "label")

View File

@ -5,14 +5,14 @@ data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
watchlist <- list(eval = dtest, train = dtrain)
evals <- list(eval = dtest, train = dtrain)
###
# advanced: start from a initial base prediction
#
print('start running example to start from a initial prediction')
# train xgboost for 1 round
param <- list(max_depth = 2, eta = 1, nthread = 2, objective = 'binary:logistic')
bst <- xgb.train(param, dtrain, 1, watchlist)
bst <- xgb.train(param, dtrain, 1, evals)
# Note: we need the margin value instead of transformed prediction in set_base_margin
# do predict with output_margin=TRUE, will always give you margin values before logistic transformation
ptrain <- predict(bst, dtrain, outputmargin = TRUE)
@ -23,4 +23,4 @@ setinfo(dtrain, "base_margin", ptrain)
setinfo(dtest, "base_margin", ptest)
print('this is result of boost from initial prediction')
bst <- xgb.train(params = param, data = dtrain, nrounds = 1, watchlist = watchlist)
bst <- xgb.train(params = param, data = dtrain, nrounds = 1, evals = evals)

View File

@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction
# you must know what you are doing
watchlist <- list(eval = dtest, train = dtrain)
evals <- list(eval = dtest, train = dtrain)
num_round <- 2
# user define objective function, given prediction, return gradient and second order gradient
@ -38,7 +38,7 @@ param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0,
print('start training with user customized objective')
# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
bst <- xgb.train(param, dtrain, num_round, watchlist)
bst <- xgb.train(param, dtrain, num_round, evals)
#
# there can be cases where you want additional information
@ -62,4 +62,4 @@ param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0,
print('start training with user customized objective, with additional attributes in DMatrix')
# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
bst <- xgb.train(param, dtrain, num_round, watchlist)
bst <- xgb.train(param, dtrain, num_round, evals)

View File

@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# note: what we are getting is margin value in prediction
# you must know what you are doing
param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0)
watchlist <- list(eval = dtest)
evals <- list(eval = dtest)
num_round <- 20
# user define objective function, given prediction, return gradient and second order gradient
# this is log likelihood loss
@ -32,7 +32,7 @@ evalerror <- function(preds, dtrain) {
}
print('start training with early Stopping setting')
bst <- xgb.train(param, dtrain, num_round, watchlist,
bst <- xgb.train(param, dtrain, num_round, evals,
objective = logregobj, eval_metric = evalerror, maximize = FALSE,
early_stopping_round = 3)
bst <- xgb.cv(param, dtrain, num_round, nfold = 5,

View File

@ -25,9 +25,9 @@ param <- list(objective = "binary:logistic", booster = "gblinear",
##
# the rest of settings are the same
##
watchlist <- list(eval = dtest, train = dtrain)
evals <- list(eval = dtest, train = dtrain)
num_round <- 2
bst <- xgb.train(param, dtrain, num_round, watchlist)
bst <- xgb.train(param, dtrain, num_round, evals)
ypred <- predict(bst, dtest)
labels <- getinfo(dtest, 'label')
cat('error of preds=', mean(as.numeric(ypred > 0.5) != labels), '\n')

View File

@ -23,7 +23,7 @@ y <- rbinom(N, 1, plogis(m))
tr <- sample.int(N, N * 0.75)
dtrain <- xgb.DMatrix(X[tr, ], label = y[tr])
dtest <- xgb.DMatrix(X[-tr, ], label = y[-tr])
wl <- list(train = dtrain, test = dtest)
evals <- list(train = dtrain, test = dtest)
# An example of running 'gpu_hist' algorithm
# which is
@ -35,11 +35,11 @@ wl <- list(train = dtrain, test = dtest)
param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4,
max_bin = 64, tree_method = 'gpu_hist')
pt <- proc.time()
bst_gpu <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
bst_gpu <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
proc.time() - pt
# Compare to the 'hist' algorithm:
param$tree_method <- 'hist'
pt <- proc.time()
bst_hist <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
bst_hist <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
proc.time() - pt

View File

@ -6,11 +6,11 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
watchlist <- list(eval = dtest, train = dtrain)
evals <- list(eval = dtest, train = dtrain)
nrounds <- 2
# training the model for two rounds
bst <- xgb.train(param, dtrain, nrounds, nthread = 2, watchlist)
bst <- xgb.train(param, dtrain, nrounds, nthread = 2, evals = evals)
cat('start testing prediction from first n trees\n')
labels <- getinfo(dtest, 'label')

View File

@ -43,7 +43,6 @@ colnames(new.features.test) <- colnames(new.features.train)
# learning with new features
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
watchlist <- list(train = new.dtrain)
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
# Model accuracy with new features

View File

@ -39,7 +39,7 @@ bst <- xgb.train(
data = d_train,
params = params,
maximize = FALSE,
watchlist = list(train = d_train),
evals = list(train = d_train),
nrounds = 20)
var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst)

View File

@ -7,11 +7,11 @@
xgb.Callback(
cb_name = "custom_callback",
env = new.env(),
f_before_training = function(env, model, data, watchlist, begin_iteration,
end_iteration) NULL,
f_before_iter = function(env, model, data, watchlist, iteration) NULL,
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL,
f_after_training = function(env, model, data, watchlist, iteration, final_feval,
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration)
NULL,
f_before_iter = function(env, model, data, evals, iteration) NULL,
f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
f_after_training = function(env, model, data, evals, iteration, final_feval,
prev_cb_res) NULL
)
}
@ -82,10 +82,10 @@ not be kept after the model fitting function terminates (see parameter \code{f_a
For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
\item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object).
\item \code{bst}: Rhe \code{xgb.Booster} object for the fold.
\item \code{watchlist}: A list with two DMatrices, with names \code{train} and \code{test}
\item \code{evals}: A list containing two DMatrices, with names \code{train} and \code{test}
(\code{test} is the held-out data for the fold).
\item \code{index}: The indices of the hold-out data for that fold (base-1 indexing),
from which the \code{test} entry in the watchlist was obtained.
from which the \code{test} entry in \code{evals} was obtained.
}
This object should \bold{not} be in-place modified in ways that conflict with the
@ -104,7 +104,7 @@ For keeping variables across iterations, it's recommended to use \code{env} inst
Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
folds can be found in the \code{model} object.
\item watchlist The evaluation watchlist, as passed under argument \code{watchlist} to
\item evals The evaluation data, as passed under argument \code{evals} to
\link{xgb.train}.
For \link{xgb.cv}, this will always be \code{NULL}.
@ -127,15 +127,15 @@ example by using the early stopping callback \link{xgb.cb.early.stop}.
\item iteration Index of the iteration number that is being executed (first iteration
will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on).
\item iter_feval Evaluation metrics for the \code{watchlist} that was supplied, either
\item iter_feval Evaluation metrics for \code{evals} that were supplied, either
determined by the objective, or by parameter \code{feval}.
For \link{xgb.train}, this will be a named vector with one entry per element in
\code{watchlist}, where the names are determined as 'watchlist name' + '-' + 'metric name' - for
example, if \code{watchlist} contains an entry named "tr" and the metric is "rmse",
\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for
example, if \code{evals} contains an entry named "tr" and the metric is "rmse",
this will be a one-element vector with name "tr-rmse".
For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(watchlist), nfolds]},
For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
where the row names will follow the same naming logic as the one-dimensional vector
that is passed in \link{xgb.train}.
@ -187,18 +187,18 @@ the order in which the callbacks are passed to the model fitting function.
}
\examples{
# Example constructing a custom callback that calculates
# squared error on the training data, without a watchlist,
# squared error on the training data (no separate test set),
# and outputs the per-iteration results.
ssq_callback <- xgb.Callback(
cb_name = "ssq",
f_before_training = function(env, model, data, watchlist,
f_before_training = function(env, model, data, evals,
begin_iteration, end_iteration) {
# A vector to keep track of a number at each iteration
env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
},
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
# This calculates the sum of squared errors on the training data.
# Note that this can be better done by passing a 'watchlist' entry,
# Note that this can be better done by passing an 'evals' entry,
# but this demonstrates a way in which callbacks can be structured.
pred <- predict(model, data)
err <- pred - getinfo(data, "label")
@ -214,7 +214,7 @@ ssq_callback <- xgb.Callback(
# A return value of 'TRUE' here would signal to finalize the training
return(FALSE)
},
f_after_training = function(env, model, data, watchlist, iteration,
f_after_training = function(env, model, data, evals, iteration,
final_feval, prev_cb_res) {
return(env$logs)
}

View File

@ -20,9 +20,9 @@ the evaluation metric in order to stop the training.}
\item{metric_name}{The name of an evaluation column to use as a criteria for early
stopping. If not set, the last column would be used.
Let's say the test data in \code{watchlist} was labelled as \code{dtest},
Let's say the test data in \code{evals} was labelled as \code{dtest},
and one wants to use the AUC in test data for early stopping regardless of where
it is in the \code{watchlist}, then one of the following would need to be set:
it is in the \code{evals}, then one of the following would need to be set:
\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
All dash '-' characters in metric names are considered equivalent to '_'.}
@ -51,5 +51,5 @@ condition occurred. Note that the \code{best_iteration} that is stored under R a
base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
through \link{xgb.attr} or \link{xgb.attributes}.
At least one data element is required in the evaluation watchlist for early stopping to work.
At least one dataset is required in \code{evals} for early stopping to work.
}

View File

@ -14,7 +14,7 @@ Callback for logging the evaluation history
}
\details{
This callback creates a table with per-iteration evaluation metrics (see parameters
\code{watchlist} and \code{feval} in \link{xgb.train}).
\code{evals} and \code{feval} in \link{xgb.train}).
Note: in the column names of the final data.table, the dash '-' character is replaced with
the underscore '_' in order to make the column names more like regular R identifiers.

View File

@ -82,7 +82,6 @@ new.dtrain <- xgb.DMatrix(
new.dtest <- xgb.DMatrix(
data = new.features.test, label = agaricus.test$label, nthread = 2
)
watchlist <- list(train = new.dtrain)
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
# Model accuracy with new features

View File

@ -9,7 +9,7 @@ xgb.train(
params = list(),
data,
nrounds,
watchlist = list(),
evals = list(),
obj = NULL,
feval = NULL,
verbose = 1,
@ -158,13 +158,13 @@ List is provided in detail section.}
\item{nrounds}{max number of boosting iterations.}
\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
\item{evals}{Named list of \code{xgb.DMatrix} datasets to use for evaluating model performance.
Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
of these datasets during each boosting iteration, and stored in the end as a field named
\code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
\code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
printed out during the training.
E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
the performance of each round's model on mat1 and mat2.}
\item{obj}{customized objective function. Returns gradient and second order
@ -234,7 +234,7 @@ The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
\details{
These are the training functions for \code{xgboost}.
The \code{xgb.train} interface supports advanced features such as \code{watchlist},
The \code{xgb.train} interface supports advanced features such as \code{evals},
customized objective and evaluation metric functions, therefore it is more flexible
than the \code{xgboost} interface.
@ -272,7 +272,7 @@ The following callbacks are automatically created when certain parameters are se
\itemize{
\item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
and the \code{print_every_n} parameter is passed to it.
\item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present.
\item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
\item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
\item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
}
@ -307,12 +307,12 @@ dtrain <- with(
dtest <- with(
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
)
watchlist <- list(train = dtrain, eval = dtest)
evals <- list(train = dtrain, eval = dtest)
## A simple xgb.train example:
param <- list(max_depth = 2, eta = 1, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
## An xgb.train example where custom objective and evaluation metric are
## used:
@ -333,15 +333,15 @@ evalerror <- function(preds, dtrain) {
# as 'objective' and 'eval_metric' parameters in the params list:
param <- list(max_depth = 2, eta = 1, nthread = nthread,
objective = logregobj, eval_metric = evalerror)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
# or through the ... arguments:
param <- list(max_depth = 2, eta = 1, nthread = nthread)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
objective = logregobj, eval_metric = evalerror)
# or as dedicated 'obj' and 'feval' parameters of xgb.train:
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
obj = logregobj, feval = evalerror)
@ -349,11 +349,11 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
param <- list(max_depth = 2, eta = 1, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
my_etas <- list(eta = c(0.5, 0.1))
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
callbacks = list(xgb.cb.reset.parameters(my_etas)))
## Early stopping:
bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
early_stopping_rounds = 3)
## An 'xgboost' interface example:

View File

@ -20,7 +20,7 @@ test_that("train and predict binary classification", {
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = nrounds,
objective = "binary:logistic", eval_metric = "error",
watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
evals = list(train = xgb.DMatrix(train$data, label = train$label))
),
"train-error"
)
@ -152,7 +152,7 @@ test_that("train and predict softprob", {
data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
objective = "multi:softprob", num_class = 3, eval_metric = "merror",
watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
),
"train-merror"
)
@ -203,7 +203,7 @@ test_that("train and predict softmax", {
data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
objective = "multi:softmax", num_class = 3, eval_metric = "merror",
watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
),
"train-merror"
)
@ -226,7 +226,7 @@ test_that("train and predict RF", {
nthread = n_threads,
nrounds = 1, objective = "binary:logistic", eval_metric = "error",
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1,
watchlist = list(train = xgb.DMatrix(train$data, label = lb))
evals = list(train = xgb.DMatrix(train$data, label = lb))
)
expect_equal(xgb.get.num.boosted.rounds(bst), 1)
@ -250,7 +250,7 @@ test_that("train and predict RF with softprob", {
objective = "multi:softprob", eval_metric = "merror",
num_class = 3, verbose = 0,
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5,
watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
)
expect_equal(xgb.get.num.boosted.rounds(bst), 15)
# predict for all iterations:
@ -271,7 +271,7 @@ test_that("use of multiple eval metrics works", {
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss",
watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
evals = list(train = xgb.DMatrix(train$data, label = train$label))
),
"train-error.*train-auc.*train-logloss"
)
@ -283,7 +283,7 @@ test_that("use of multiple eval metrics works", {
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = list("error", "auc", "logloss"),
watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
evals = list(train = xgb.DMatrix(train$data, label = train$label))
),
"train-error.*train-auc.*train-logloss"
)
@ -295,19 +295,19 @@ test_that("use of multiple eval metrics works", {
test_that("training continuation works", {
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
watchlist <- list(train = dtrain)
evals <- list(train = dtrain)
param <- list(
objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
)
# for the reference, use 4 iterations at once:
set.seed(11)
bst <- xgb.train(param, dtrain, nrounds = 4, watchlist, verbose = 0)
bst <- xgb.train(param, dtrain, nrounds = 4, evals = evals, verbose = 0)
# first two iterations:
set.seed(11)
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
# continue for two more:
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = bst1)
if (!windows_flag && !solaris_flag) {
expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
}
@ -315,7 +315,7 @@ test_that("training continuation works", {
expect_equal(dim(attributes(bst2)$evaluation_log), c(4, 2))
expect_equal(attributes(bst2)$evaluation_log, attributes(bst)$evaluation_log)
# test continuing from raw model data
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
if (!windows_flag && !solaris_flag) {
expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
}
@ -323,7 +323,7 @@ test_that("training continuation works", {
# test continuing from a model in file
fname <- file.path(tempdir(), "xgboost.json")
xgb.save(bst1, fname)
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = fname)
bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = fname)
if (!windows_flag && !solaris_flag) {
expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
}
@ -417,7 +417,7 @@ test_that("max_delta_step works", {
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
watchlist <- list(train = dtrain)
evals <- list(train = dtrain)
param <- list(
objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
nthread = n_threads,
@ -425,9 +425,9 @@ test_that("max_delta_step works", {
)
nrounds <- 5
# model with no restriction on max_delta_step
bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
bst1 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1)
# model with restricted max_delta_step
bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1)
bst2 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1, max_delta_step = 1)
# the no-restriction model is expected to have consistently lower loss during the initial iterations
expect_true(all(attributes(bst1)$evaluation_log$train_logloss < attributes(bst2)$evaluation_log$train_logloss))
expect_lt(mean(attributes(bst1)$evaluation_log$train_logloss) / mean(attributes(bst2)$evaluation_log$train_logloss), 0.8)
@ -444,7 +444,7 @@ test_that("colsample_bytree works", {
colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
watchlist <- list(train = dtrain, eval = dtest)
evals <- list(train = dtrain, eval = dtest)
## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
## each tree
param <- list(
@ -453,7 +453,7 @@ test_that("colsample_bytree works", {
eval_metric = "auc"
)
set.seed(2)
bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
bst <- xgb.train(param, dtrain, nrounds = 100, evals = evals, verbose = 0)
xgb.importance(model = bst)
# If colsample_bytree works properly, a variety of features should be used
# in the 100 trees

View File

@ -19,7 +19,7 @@ ltrain <- add.noise(train$label, 0.2)
ltest <- add.noise(test$label, 0.2)
dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
watchlist <- list(train = dtrain, test = dtest)
evals <- list(train = dtrain, test = dtest)
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
@ -39,7 +39,7 @@ test_that("xgb.cb.print.evaluation works as expected for xgb.train", {
nthread = n_threads
),
nrounds = 10,
watchlist = list(train = dtrain, test = dtest),
evals = list(train = dtrain, test = dtest),
callbacks = list(xgb.cb.print.evaluation(period = 1))
)
})
@ -57,7 +57,7 @@ test_that("xgb.cb.print.evaluation works as expected for xgb.train", {
nthread = n_threads
),
nrounds = 10,
watchlist = list(train = dtrain, test = dtest),
evals = list(train = dtrain, test = dtest),
callbacks = list(xgb.cb.print.evaluation(period = 2))
)
})
@ -117,7 +117,7 @@ test_that("xgb.cb.evaluation.log works as expected for xgb.train", {
),
nrounds = 10,
verbose = FALSE,
watchlist = list(train = dtrain, test = dtest),
evals = list(train = dtrain, test = dtest),
callbacks = list(xgb.cb.evaluation.log())
)
logs <- attributes(model)$evaluation_log
@ -155,7 +155,7 @@ param <- list(objective = "binary:logistic", eval_metric = "error",
test_that("can store evaluation_log without printing", {
expect_silent(
bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1, verbose = 0)
bst <- xgb.train(param, dtrain, nrounds = 10, evals = evals, eta = 1, verbose = 0)
)
expect_false(is.null(attributes(bst)$evaluation_log))
expect_false(is.null(attributes(bst)$evaluation_log$train_error))
@ -166,14 +166,14 @@ test_that("xgb.cb.reset.parameters works as expected", {
# fixed eta
set.seed(111)
bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9, verbose = 0)
bst0 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 0.9, verbose = 0)
expect_false(is.null(attributes(bst0)$evaluation_log))
expect_false(is.null(attributes(bst0)$evaluation_log$train_error))
# same eta but re-set as a vector parameter in the callback
set.seed(111)
my_par <- list(eta = c(0.9, 0.9))
bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
callbacks = list(xgb.cb.reset.parameters(my_par)))
expect_false(is.null(attributes(bst1)$evaluation_log$train_error))
expect_equal(attributes(bst0)$evaluation_log$train_error,
@ -182,7 +182,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
# same eta but re-set via a function in the callback
set.seed(111)
my_par <- list(eta = function(itr, itr_end) 0.9)
bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
callbacks = list(xgb.cb.reset.parameters(my_par)))
expect_false(is.null(attributes(bst2)$evaluation_log$train_error))
expect_equal(attributes(bst0)$evaluation_log$train_error,
@ -191,7 +191,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
# different eta re-set as a vector parameter in the callback
set.seed(111)
my_par <- list(eta = c(0.6, 0.5))
bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
bst3 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
callbacks = list(xgb.cb.reset.parameters(my_par)))
expect_false(is.null(attributes(bst3)$evaluation_log$train_error))
expect_false(all(attributes(bst0)$evaluation_log$train_error == attributes(bst3)$evaluation_log$train_error))
@ -199,7 +199,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
# resetting multiple parameters at the same time runs with no error
my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8))
expect_error(
bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
bst4 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
callbacks = list(xgb.cb.reset.parameters(my_par)))
, NA) # NA = no error
# CV works as well
@ -210,7 +210,7 @@ test_that("xgb.cb.reset.parameters works as expected", {
# expect no learning with 0 learning rate
my_par <- list(eta = c(0., 0.))
bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
bstX <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
callbacks = list(xgb.cb.reset.parameters(my_par)))
expect_false(is.null(attributes(bstX)$evaluation_log$train_error))
er <- unique(attributes(bstX)$evaluation_log$train_error)
@ -223,7 +223,7 @@ test_that("xgb.cb.save.model works as expected", {
files <- unname(sapply(files, function(f) file.path(tempdir(), f)))
for (f in files) if (file.exists(f)) file.remove(f)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
save_period = 1, save_name = file.path(tempdir(), "xgboost_%02d.json"))
expect_true(file.exists(files[1]))
expect_true(file.exists(files[2]))
@ -239,7 +239,7 @@ test_that("xgb.cb.save.model works as expected", {
expect_equal(xgb.save.raw(bst), xgb.save.raw(b2))
# save_period = 0 saves the last iteration's model
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
save_period = 0, save_name = file.path(tempdir(), 'xgboost.json'))
expect_true(file.exists(files[3]))
b2 <- xgb.load(files[3])
@ -252,7 +252,7 @@ test_that("xgb.cb.save.model works as expected", {
test_that("early stopping xgb.train works", {
set.seed(11)
expect_output(
bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
bst <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
early_stopping_rounds = 3, maximize = FALSE)
, "Stopping. Best iteration")
expect_false(is.null(xgb.attr(bst, "best_iteration")))
@ -266,7 +266,7 @@ test_that("early stopping xgb.train works", {
set.seed(11)
expect_silent(
bst0 <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
bst0 <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
early_stopping_rounds = 3, maximize = FALSE, verbose = 0)
)
expect_equal(attributes(bst)$evaluation_log, attributes(bst0)$evaluation_log)
@ -282,7 +282,7 @@ test_that("early stopping xgb.train works", {
test_that("early stopping using a specific metric works", {
set.seed(11)
expect_output(
bst <- xgb.train(param[-2], dtrain, nrounds = 20, watchlist, eta = 0.6,
bst <- xgb.train(param[-2], dtrain, nrounds = 20, evals = evals, eta = 0.6,
eval_metric = "logloss", eval_metric = "auc",
callbacks = list(xgb.cb.early.stop(stopping_rounds = 3, maximize = FALSE,
metric_name = 'test_logloss')))
@ -315,7 +315,7 @@ test_that("early stopping works with titanic", {
nrounds = 100,
early_stopping_rounds = 3,
nthread = n_threads,
watchlist = list(train = xgb.DMatrix(dtx, label = dty))
evals = list(train = xgb.DMatrix(dtx, label = dty))
)
expect_true(TRUE) # should not crash

View File

@ -12,7 +12,7 @@ dtrain <- xgb.DMatrix(
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
)
watchlist <- list(eval = dtest, train = dtrain)
evals <- list(eval = dtest, train = dtrain)
logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
@ -33,7 +33,7 @@ param <- list(max_depth = 2, eta = 1, nthread = n_threads,
num_round <- 2
test_that("custom objective works", {
bst <- xgb.train(param, dtrain, num_round, watchlist)
bst <- xgb.train(param, dtrain, num_round, evals)
expect_equal(class(bst), "xgb.Booster")
expect_false(is.null(attributes(bst)$evaluation_log))
expect_false(is.null(attributes(bst)$evaluation_log$eval_error))
@ -48,7 +48,7 @@ test_that("custom objective in CV works", {
})
test_that("custom objective with early stop works", {
bst <- xgb.train(param, dtrain, 10, watchlist)
bst <- xgb.train(param, dtrain, 10, evals)
expect_equal(class(bst), "xgb.Booster")
train_log <- attributes(bst)$evaluation_log$train_error
expect_true(all(diff(train_log) <= 0))
@ -66,7 +66,7 @@ test_that("custom objective using DMatrix attr works", {
return(list(grad = grad, hess = hess))
}
param$objective <- logregobjattr
bst <- xgb.train(param, dtrain, num_round, watchlist)
bst <- xgb.train(param, dtrain, num_round, evals)
expect_equal(class(bst), "xgb.Booster")
})

View File

@ -41,13 +41,13 @@ test_that("xgb.DMatrix: basic construction", {
params <- list(tree_method = "hist", nthread = n_threads)
bst_fd <- xgb.train(
params, nrounds = 8, fd, watchlist = list(train = fd)
params, nrounds = 8, fd, evals = list(train = fd)
)
bst_dgr <- xgb.train(
params, nrounds = 8, fdgr, watchlist = list(train = fdgr)
params, nrounds = 8, fdgr, evals = list(train = fdgr)
)
bst_dgc <- xgb.train(
params, nrounds = 8, fdgc, watchlist = list(train = fdgc)
params, nrounds = 8, fdgc, evals = list(train = fdgc)
)
raw_fd <- xgb.save.raw(bst_fd, raw_format = "ubj")

View File

@ -14,19 +14,19 @@ test_that("gblinear works", {
param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
watchlist <- list(eval = dtest, train = dtrain)
evals <- list(eval = dtest, train = dtrain)
n <- 5 # iterations
ERR_UL <- 0.005 # upper limit for the test set error
VERB <- 0 # chatterbox switch
param$updater <- 'shotgun'
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
ypred <- predict(bst, dtest)
expect_equal(length(getinfo(dtest, 'label')), 1611)
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic',
bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic',
callbacks = list(xgb.cb.gblinear.history()))
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
h <- xgb.gblinear.history(bst)
@ -34,16 +34,16 @@ test_that("gblinear works", {
expect_is(h, "matrix")
param$updater <- 'coord_descent'
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic')
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
bst <- xgb.train(param, dtrain, 2, watchlist, verbose = VERB, feature_selector = 'greedy')
bst <- xgb.train(param, dtrain, 2, evals, verbose = VERB, feature_selector = 'greedy')
expect_lt(attributes(bst)$evaluation_log$eval_error[2], ERR_UL)
bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'thrifty',
bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'thrifty',
top_k = 50, callbacks = list(xgb.cb.gblinear.history(sparse = TRUE)))
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
h <- xgb.gblinear.history(bst)

View File

@ -15,7 +15,7 @@ test_that('Test ranking with unweighted data', {
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
# Check if the metric is monotone increasing
expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
@ -39,7 +39,7 @@ test_that('Test ranking with weighted data', {
eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
)
bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
# Check if the metric is monotone increasing
expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))

View File

@ -17,7 +17,7 @@ dtest <- xgb.DMatrix(
win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
test_that("updating the model works", {
watchlist <- list(train = dtrain, test = dtest)
evals <- list(train = dtrain, test = dtest)
# no-subsampling
p1 <- list(
@ -25,19 +25,19 @@ test_that("updating the model works", {
updater = "grow_colmaker,prune"
)
set.seed(11)
bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0)
bst1 <- xgb.train(p1, dtrain, nrounds = 10, evals = evals, verbose = 0)
tr1 <- xgb.model.dt.tree(model = bst1)
# with subsampling
p2 <- modifyList(p1, list(subsample = 0.1))
set.seed(11)
bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0)
bst2 <- xgb.train(p2, dtrain, nrounds = 10, evals = evals, verbose = 0)
tr2 <- xgb.model.dt.tree(model = bst2)
# the same no-subsampling boosting with an extra 'refresh' updater:
p1r <- modifyList(p1, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
set.seed(11)
bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0)
bst1r <- xgb.train(p1r, dtrain, nrounds = 10, evals = evals, verbose = 0)
tr1r <- xgb.model.dt.tree(model = bst1r)
# all should be the same when no subsampling
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1r)$evaluation_log)
@ -53,7 +53,7 @@ test_that("updating the model works", {
# the same boosting with subsampling with an extra 'refresh' updater:
p2r <- modifyList(p2, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
set.seed(11)
bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0)
bst2r <- xgb.train(p2r, dtrain, nrounds = 10, evals = evals, verbose = 0)
tr2r <- xgb.model.dt.tree(model = bst2r)
# should be the same evaluation but different gains and larger cover
expect_equal(attributes(bst2)$evaluation_log, attributes(bst2r)$evaluation_log)
@ -66,7 +66,7 @@ test_that("updating the model works", {
# process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
set.seed(123)
p1u <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = TRUE))
bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
tr1u <- xgb.model.dt.tree(model = bst1u)
# all should be the same when no subsampling
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@ -79,7 +79,7 @@ test_that("updating the model works", {
# same thing but with a serialized model
set.seed(123)
bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
tr1u <- xgb.model.dt.tree(model = bst1u)
# all should be the same when no subsampling
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@ -87,7 +87,7 @@ test_that("updating the model works", {
# process type 'update' for model with subsampling, refreshing only the tree stats from training data:
p2u <- modifyList(p2, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst2)
bst2u <- xgb.train(p2u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst2)
tr2u <- xgb.model.dt.tree(model = bst2u)
# should be the same evaluation but different gains and larger cover
expect_equal(attributes(bst2)$evaluation_log, attributes(bst2u)$evaluation_log)
@ -102,7 +102,7 @@ test_that("updating the model works", {
# process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:
p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
tr1ut <- xgb.model.dt.tree(model = bst1ut)
# should be the same evaluations but different gains and smaller cover (test data is smaller)
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1ut)$evaluation_log)
@ -115,18 +115,18 @@ test_that("updating works for multiclass & multitree", {
dtr <- xgb.DMatrix(
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
)
watchlist <- list(train = dtr)
evals <- list(train = dtr)
p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
base_score = 0)
set.seed(121)
bst0 <- xgb.train(p0, dtr, 5, watchlist, verbose = 0)
bst0 <- xgb.train(p0, dtr, 5, evals = evals, verbose = 0)
tr0 <- xgb.model.dt.tree(model = bst0)
# run update process for an original model with subsampling
p0u <- modifyList(p0, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
bst0u <- xgb.train(p0u, dtr, nrounds = xgb.get.num.boosted.rounds(bst0),
watchlist, xgb_model = bst0, verbose = 0)
evals = evals, xgb_model = bst0, verbose = 0)
tr0u <- xgb.model.dt.tree(model = bst0u)
# should be the same evaluation but different gains and larger cover

View File

@ -341,10 +341,10 @@ One way to measure progress in learning of a model is to provide to **XGBoost**
> in some way it is similar to what we have done above with the average error. The main difference is that below it was after building the model, and now it is during the construction that we measure errors.
For the purpose of this example, we use `watchlist` parameter. It is a list of `xgb.DMatrix`, each of them tagged with a name.
For the purpose of this example, we use the `evals` parameter. It is a list of `xgb.DMatrix` objects, each of them tagged with a name.
```{r watchlist, message=F, warning=F}
watchlist <- list(train = dtrain, test = dtest)
```{r evals, message=F, warning=F}
evals <- list(train = dtrain, test = dtest)
bst <- xgb.train(
data = dtrain
@ -355,7 +355,7 @@ bst <- xgb.train(
, objective = "binary:logistic"
)
, nrounds = 2
, watchlist = watchlist
, evals = evals
)
```
@ -367,7 +367,7 @@ If with your own dataset you have not such results, you should think about how y
For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.
```{r watchlist2, message=F, warning=F}
```{r evals2, message=F, warning=F}
bst <- xgb.train(
data = dtrain
, max_depth = 2
@ -379,7 +379,7 @@ bst <- xgb.train(
, eval_metric = "logloss"
)
, nrounds = 2
, watchlist = watchlist
, evals = evals
)
```
@ -401,7 +401,7 @@ bst <- xgb.train(
, eval_metric = "logloss"
)
, nrounds = 2
, watchlist = watchlist
, evals = evals
)
```
@ -430,7 +430,7 @@ bst <- xgb.train(
, objective = "binary:logistic"
)
, nrounds = 2
, watchlist = watchlist
, evals = evals
)
```