[R] Rename watchlist -> evals (#10032)

This commit is contained in:
david-cortes
2024-03-09 23:48:06 +01:00
committed by GitHub
parent 2c13f90384
commit b023a253b4
28 changed files with 218 additions and 221 deletions

View File

@@ -7,11 +7,11 @@
xgb.Callback(
cb_name = "custom_callback",
env = new.env(),
f_before_training = function(env, model, data, watchlist, begin_iteration,
end_iteration) NULL,
f_before_iter = function(env, model, data, watchlist, iteration) NULL,
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL,
f_after_training = function(env, model, data, watchlist, iteration, final_feval,
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration)
NULL,
f_before_iter = function(env, model, data, evals, iteration) NULL,
f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
f_after_training = function(env, model, data, evals, iteration, final_feval,
prev_cb_res) NULL
)
}
@@ -82,10 +82,10 @@ not be kept after the model fitting function terminates (see parameter \code{f_a
For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
\item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object).
\item \code{bst}: Rhe \code{xgb.Booster} object for the fold.
\item \code{watchlist}: A list with two DMatrices, with names \code{train} and \code{test}
\item \code{evals}: A list containing two DMatrices, with names \code{train} and \code{test}
(\code{test} is the held-out data for the fold).
\item \code{index}: The indices of the hold-out data for that fold (base-1 indexing),
from which the \code{test} entry in the watchlist was obtained.
from which the \code{test} entry in \code{evals} was obtained.
}
This object should \bold{not} be in-place modified in ways that conflict with the
@@ -104,7 +104,7 @@ For keeping variables across iterations, it's recommended to use \code{env} inst
Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
folds can be found in the \code{model} object.
\item watchlist The evaluation watchlist, as passed under argument \code{watchlist} to
\item evals The evaluation data, as passed under argument \code{evals} to
\link{xgb.train}.
For \link{xgb.cv}, this will always be \code{NULL}.
@@ -127,15 +127,15 @@ example by using the early stopping callback \link{xgb.cb.early.stop}.
\item iteration Index of the iteration number that is being executed (first iteration
will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on).
\item iter_feval Evaluation metrics for the \code{watchlist} that was supplied, either
\item iter_feval Evaluation metrics for \code{evals} that were supplied, either
determined by the objective, or by parameter \code{feval}.
For \link{xgb.train}, this will be a named vector with one entry per element in
\code{watchlist}, where the names are determined as 'watchlist name' + '-' + 'metric name' - for
example, if \code{watchlist} contains an entry named "tr" and the metric is "rmse",
\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for
example, if \code{evals} contains an entry named "tr" and the metric is "rmse",
this will be a one-element vector with name "tr-rmse".
For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(watchlist), nfolds]},
For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
where the row names will follow the same naming logic as the one-dimensional vector
that is passed in \link{xgb.train}.
@@ -187,18 +187,18 @@ the order in which the callbacks are passed to the model fitting function.
}
\examples{
# Example constructing a custom callback that calculates
# squared error on the training data, without a watchlist,
# squared error on the training data (no separate test set),
# and outputs the per-iteration results.
ssq_callback <- xgb.Callback(
cb_name = "ssq",
f_before_training = function(env, model, data, watchlist,
f_before_training = function(env, model, data, evals,
begin_iteration, end_iteration) {
# A vector to keep track of a number at each iteration
env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
},
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
# This calculates the sum of squared errors on the training data.
# Note that this can be better done by passing a 'watchlist' entry,
# Note that this can be better done by passing an 'evals' entry,
# but this demonstrates a way in which callbacks can be structured.
pred <- predict(model, data)
err <- pred - getinfo(data, "label")
@@ -214,7 +214,7 @@ ssq_callback <- xgb.Callback(
# A return value of 'TRUE' here would signal to finalize the training
return(FALSE)
},
f_after_training = function(env, model, data, watchlist, iteration,
f_after_training = function(env, model, data, evals, iteration,
final_feval, prev_cb_res) {
return(env$logs)
}

View File

@@ -20,9 +20,9 @@ the evaluation metric in order to stop the training.}
\item{metric_name}{The name of an evaluation column to use as a criteria for early
stopping. If not set, the last column would be used.
Let's say the test data in \code{watchlist} was labelled as \code{dtest},
Let's say the test data in \code{evals} was labelled as \code{dtest},
and one wants to use the AUC in test data for early stopping regardless of where
it is in the \code{watchlist}, then one of the following would need to be set:
it is in the \code{evals}, then one of the following would need to be set:
\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
All dash '-' characters in metric names are considered equivalent to '_'.}
@@ -51,5 +51,5 @@ condition occurred. Note that the \code{best_iteration} that is stored under R a
base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
through \link{xgb.attr} or \link{xgb.attributes}.
At least one data element is required in the evaluation watchlist for early stopping to work.
At least one dataset is required in \code{evals} for early stopping to work.
}

View File

@@ -14,7 +14,7 @@ Callback for logging the evaluation history
}
\details{
This callback creates a table with per-iteration evaluation metrics (see parameters
\code{watchlist} and \code{feval} in \link{xgb.train}).
\code{evals} and \code{feval} in \link{xgb.train}).
Note: in the column names of the final data.table, the dash '-' character is replaced with
the underscore '_' in order to make the column names more like regular R identifiers.

View File

@@ -82,7 +82,6 @@ new.dtrain <- xgb.DMatrix(
new.dtest <- xgb.DMatrix(
data = new.features.test, label = agaricus.test$label, nthread = 2
)
watchlist <- list(train = new.dtrain)
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
# Model accuracy with new features

View File

@@ -9,7 +9,7 @@ xgb.train(
params = list(),
data,
nrounds,
watchlist = list(),
evals = list(),
obj = NULL,
feval = NULL,
verbose = 1,
@@ -158,13 +158,13 @@ List is provided in detail section.}
\item{nrounds}{max number of boosting iterations.}
\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
\item{evals}{Named list of \code{xgb.DMatrix} datasets to use for evaluating model performance.
Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
of these datasets during each boosting iteration, and stored in the end as a field named
\code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
\code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
printed out during the training.
E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
the performance of each round's model on mat1 and mat2.}
\item{obj}{customized objective function. Returns gradient and second order
@@ -234,7 +234,7 @@ The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
\details{
These are the training functions for \code{xgboost}.
The \code{xgb.train} interface supports advanced features such as \code{watchlist},
The \code{xgb.train} interface supports advanced features such as \code{evals},
customized objective and evaluation metric functions, therefore it is more flexible
than the \code{xgboost} interface.
@@ -272,7 +272,7 @@ The following callbacks are automatically created when certain parameters are se
\itemize{
\item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
and the \code{print_every_n} parameter is passed to it.
\item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present.
\item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
\item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
\item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
}
@@ -307,12 +307,12 @@ dtrain <- with(
dtest <- with(
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
)
watchlist <- list(train = dtrain, eval = dtest)
evals <- list(train = dtrain, eval = dtest)
## A simple xgb.train example:
param <- list(max_depth = 2, eta = 1, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
## An xgb.train example where custom objective and evaluation metric are
## used:
@@ -333,15 +333,15 @@ evalerror <- function(preds, dtrain) {
# as 'objective' and 'eval_metric' parameters in the params list:
param <- list(max_depth = 2, eta = 1, nthread = nthread,
objective = logregobj, eval_metric = evalerror)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
# or through the ... arguments:
param <- list(max_depth = 2, eta = 1, nthread = nthread)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
objective = logregobj, eval_metric = evalerror)
# or as dedicated 'obj' and 'feval' parameters of xgb.train:
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
obj = logregobj, feval = evalerror)
@@ -349,11 +349,11 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
param <- list(max_depth = 2, eta = 1, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
my_etas <- list(eta = c(0.5, 0.1))
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
callbacks = list(xgb.cb.reset.parameters(my_etas)))
## Early stopping:
bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
early_stopping_rounds = 3)
## An 'xgboost' interface example: