[R] Rename watchlist -> evals (#10032)
This commit is contained in:
@@ -7,11 +7,11 @@
|
||||
xgb.Callback(
|
||||
cb_name = "custom_callback",
|
||||
env = new.env(),
|
||||
f_before_training = function(env, model, data, watchlist, begin_iteration,
|
||||
end_iteration) NULL,
|
||||
f_before_iter = function(env, model, data, watchlist, iteration) NULL,
|
||||
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL,
|
||||
f_after_training = function(env, model, data, watchlist, iteration, final_feval,
|
||||
f_before_training = function(env, model, data, evals, begin_iteration, end_iteration)
|
||||
NULL,
|
||||
f_before_iter = function(env, model, data, evals, iteration) NULL,
|
||||
f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
|
||||
f_after_training = function(env, model, data, evals, iteration, final_feval,
|
||||
prev_cb_res) NULL
|
||||
)
|
||||
}
|
||||
@@ -82,10 +82,10 @@ not be kept after the model fitting function terminates (see parameter \code{f_a
|
||||
For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
|
||||
\item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object).
|
||||
\item \code{bst}: Rhe \code{xgb.Booster} object for the fold.
|
||||
\item \code{watchlist}: A list with two DMatrices, with names \code{train} and \code{test}
|
||||
\item \code{evals}: A list containing two DMatrices, with names \code{train} and \code{test}
|
||||
(\code{test} is the held-out data for the fold).
|
||||
\item \code{index}: The indices of the hold-out data for that fold (base-1 indexing),
|
||||
from which the \code{test} entry in the watchlist was obtained.
|
||||
from which the \code{test} entry in \code{evals} was obtained.
|
||||
}
|
||||
|
||||
This object should \bold{not} be in-place modified in ways that conflict with the
|
||||
@@ -104,7 +104,7 @@ For keeping variables across iterations, it's recommended to use \code{env} inst
|
||||
Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
|
||||
folds can be found in the \code{model} object.
|
||||
|
||||
\item watchlist The evaluation watchlist, as passed under argument \code{watchlist} to
|
||||
\item evals The evaluation data, as passed under argument \code{evals} to
|
||||
\link{xgb.train}.
|
||||
|
||||
For \link{xgb.cv}, this will always be \code{NULL}.
|
||||
@@ -127,15 +127,15 @@ example by using the early stopping callback \link{xgb.cb.early.stop}.
|
||||
\item iteration Index of the iteration number that is being executed (first iteration
|
||||
will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on).
|
||||
|
||||
\item iter_feval Evaluation metrics for the \code{watchlist} that was supplied, either
|
||||
\item iter_feval Evaluation metrics for \code{evals} that were supplied, either
|
||||
determined by the objective, or by parameter \code{feval}.
|
||||
|
||||
For \link{xgb.train}, this will be a named vector with one entry per element in
|
||||
\code{watchlist}, where the names are determined as 'watchlist name' + '-' + 'metric name' - for
|
||||
example, if \code{watchlist} contains an entry named "tr" and the metric is "rmse",
|
||||
\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for
|
||||
example, if \code{evals} contains an entry named "tr" and the metric is "rmse",
|
||||
this will be a one-element vector with name "tr-rmse".
|
||||
|
||||
For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(watchlist), nfolds]},
|
||||
For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
|
||||
where the row names will follow the same naming logic as the one-dimensional vector
|
||||
that is passed in \link{xgb.train}.
|
||||
|
||||
@@ -187,18 +187,18 @@ the order in which the callbacks are passed to the model fitting function.
|
||||
}
|
||||
\examples{
|
||||
# Example constructing a custom callback that calculates
|
||||
# squared error on the training data, without a watchlist,
|
||||
# squared error on the training data (no separate test set),
|
||||
# and outputs the per-iteration results.
|
||||
ssq_callback <- xgb.Callback(
|
||||
cb_name = "ssq",
|
||||
f_before_training = function(env, model, data, watchlist,
|
||||
f_before_training = function(env, model, data, evals,
|
||||
begin_iteration, end_iteration) {
|
||||
# A vector to keep track of a number at each iteration
|
||||
env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
|
||||
},
|
||||
f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
|
||||
f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
|
||||
# This calculates the sum of squared errors on the training data.
|
||||
# Note that this can be better done by passing a 'watchlist' entry,
|
||||
# Note that this can be better done by passing an 'evals' entry,
|
||||
# but this demonstrates a way in which callbacks can be structured.
|
||||
pred <- predict(model, data)
|
||||
err <- pred - getinfo(data, "label")
|
||||
@@ -214,7 +214,7 @@ ssq_callback <- xgb.Callback(
|
||||
# A return value of 'TRUE' here would signal to finalize the training
|
||||
return(FALSE)
|
||||
},
|
||||
f_after_training = function(env, model, data, watchlist, iteration,
|
||||
f_after_training = function(env, model, data, evals, iteration,
|
||||
final_feval, prev_cb_res) {
|
||||
return(env$logs)
|
||||
}
|
||||
|
||||
@@ -20,9 +20,9 @@ the evaluation metric in order to stop the training.}
|
||||
|
||||
\item{metric_name}{The name of an evaluation column to use as a criteria for early
|
||||
stopping. If not set, the last column would be used.
|
||||
Let's say the test data in \code{watchlist} was labelled as \code{dtest},
|
||||
Let's say the test data in \code{evals} was labelled as \code{dtest},
|
||||
and one wants to use the AUC in test data for early stopping regardless of where
|
||||
it is in the \code{watchlist}, then one of the following would need to be set:
|
||||
it is in the \code{evals}, then one of the following would need to be set:
|
||||
\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
|
||||
All dash '-' characters in metric names are considered equivalent to '_'.}
|
||||
|
||||
@@ -51,5 +51,5 @@ condition occurred. Note that the \code{best_iteration} that is stored under R a
|
||||
base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
|
||||
through \link{xgb.attr} or \link{xgb.attributes}.
|
||||
|
||||
At least one data element is required in the evaluation watchlist for early stopping to work.
|
||||
At least one dataset is required in \code{evals} for early stopping to work.
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ Callback for logging the evaluation history
|
||||
}
|
||||
\details{
|
||||
This callback creates a table with per-iteration evaluation metrics (see parameters
|
||||
\code{watchlist} and \code{feval} in \link{xgb.train}).
|
||||
\code{evals} and \code{feval} in \link{xgb.train}).
|
||||
|
||||
Note: in the column names of the final data.table, the dash '-' character is replaced with
|
||||
the underscore '_' in order to make the column names more like regular R identifiers.
|
||||
|
||||
@@ -82,7 +82,6 @@ new.dtrain <- xgb.DMatrix(
|
||||
new.dtest <- xgb.DMatrix(
|
||||
data = new.features.test, label = agaricus.test$label, nthread = 2
|
||||
)
|
||||
watchlist <- list(train = new.dtrain)
|
||||
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
|
||||
|
||||
# Model accuracy with new features
|
||||
|
||||
@@ -9,7 +9,7 @@ xgb.train(
|
||||
params = list(),
|
||||
data,
|
||||
nrounds,
|
||||
watchlist = list(),
|
||||
evals = list(),
|
||||
obj = NULL,
|
||||
feval = NULL,
|
||||
verbose = 1,
|
||||
@@ -158,13 +158,13 @@ List is provided in detail section.}
|
||||
|
||||
\item{nrounds}{max number of boosting iterations.}
|
||||
|
||||
\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
|
||||
\item{evals}{Named list of \code{xgb.DMatrix} datasets to use for evaluating model performance.
|
||||
Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
|
||||
of these datasets during each boosting iteration, and stored in the end as a field named
|
||||
\code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
|
||||
\code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
|
||||
printed out during the training.
|
||||
E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
|
||||
E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
|
||||
the performance of each round's model on mat1 and mat2.}
|
||||
|
||||
\item{obj}{customized objective function. Returns gradient and second order
|
||||
@@ -234,7 +234,7 @@ The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
|
||||
\details{
|
||||
These are the training functions for \code{xgboost}.
|
||||
|
||||
The \code{xgb.train} interface supports advanced features such as \code{watchlist},
|
||||
The \code{xgb.train} interface supports advanced features such as \code{evals},
|
||||
customized objective and evaluation metric functions, therefore it is more flexible
|
||||
than the \code{xgboost} interface.
|
||||
|
||||
@@ -272,7 +272,7 @@ The following callbacks are automatically created when certain parameters are se
|
||||
\itemize{
|
||||
\item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
|
||||
and the \code{print_every_n} parameter is passed to it.
|
||||
\item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present.
|
||||
\item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
|
||||
\item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
|
||||
\item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
|
||||
}
|
||||
@@ -307,12 +307,12 @@ dtrain <- with(
|
||||
dtest <- with(
|
||||
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
|
||||
)
|
||||
watchlist <- list(train = dtrain, eval = dtest)
|
||||
evals <- list(train = dtrain, eval = dtest)
|
||||
|
||||
## A simple xgb.train example:
|
||||
param <- list(max_depth = 2, eta = 1, nthread = nthread,
|
||||
objective = "binary:logistic", eval_metric = "auc")
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
|
||||
|
||||
## An xgb.train example where custom objective and evaluation metric are
|
||||
## used:
|
||||
@@ -333,15 +333,15 @@ evalerror <- function(preds, dtrain) {
|
||||
# as 'objective' and 'eval_metric' parameters in the params list:
|
||||
param <- list(max_depth = 2, eta = 1, nthread = nthread,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
|
||||
|
||||
# or through the ... arguments:
|
||||
param <- list(max_depth = 2, eta = 1, nthread = nthread)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
|
||||
objective = logregobj, eval_metric = evalerror)
|
||||
|
||||
# or as dedicated 'obj' and 'feval' parameters of xgb.train:
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
|
||||
obj = logregobj, feval = evalerror)
|
||||
|
||||
|
||||
@@ -349,11 +349,11 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||
param <- list(max_depth = 2, eta = 1, nthread = nthread,
|
||||
objective = "binary:logistic", eval_metric = "auc")
|
||||
my_etas <- list(eta = c(0.5, 0.1))
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
|
||||
callbacks = list(xgb.cb.reset.parameters(my_etas)))
|
||||
|
||||
## Early stopping:
|
||||
bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
|
||||
bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
|
||||
early_stopping_rounds = 3)
|
||||
|
||||
## An 'xgboost' interface example:
|
||||
|
||||
Reference in New Issue
Block a user