[R] Rename watchlist -> evals (#10032)

2024-03-09 23:48:06 +01:00
parent 2c13f90384
commit b023a253b4
28 changed files with 218 additions and 221 deletions
--- a/R-package/man/xgb.Callback.Rd
+++ b/R-package/man/xgb.Callback.Rd
@@ -7,11 +7,11 @@
 xgb.Callback(
  cb_name = "custom_callback",
  env = new.env(),
-  f_before_training = function(env, model, data, watchlist, begin_iteration,
-    end_iteration) NULL,
-  f_before_iter = function(env, model, data, watchlist, iteration) NULL,
-  f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) NULL,
-  f_after_training = function(env, model, data, watchlist, iteration, final_feval,
+  f_before_training = function(env, model, data, evals, begin_iteration, end_iteration)
+    NULL,
+  f_before_iter = function(env, model, data, evals, iteration) NULL,
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
+  f_after_training = function(env, model, data, evals, iteration, final_feval,
    prev_cb_res) NULL
 )
 }
@@ -82,10 +82,10 @@ not be kept after the model fitting function terminates (see parameter \code{f_a
 For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
 \item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object).
 \item \code{bst}: Rhe \code{xgb.Booster} object for the fold.
-\item \code{watchlist}: A list with two DMatrices, with names \code{train} and \code{test}
+\item \code{evals}: A list containing two DMatrices, with names \code{train} and \code{test}
 (\code{test} is the held-out data for the fold).
 \item \code{index}: The indices of the hold-out data for that fold (base-1 indexing),
-from which the \code{test} entry in the watchlist was obtained.
+from which the \code{test} entry in \code{evals} was obtained.
 }

 This object should \bold{not} be in-place modified in ways that conflict with the
@@ -104,7 +104,7 @@ For keeping variables across iterations, it's recommended to use \code{env} inst
 Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
 folds can be found in the \code{model} object.

-\item watchlist The evaluation watchlist, as passed under argument \code{watchlist} to
+\item evals The evaluation data, as passed under argument \code{evals} to
 \link{xgb.train}.

 For \link{xgb.cv}, this will always be \code{NULL}.
@@ -127,15 +127,15 @@ example by using the early stopping callback \link{xgb.cb.early.stop}.
 \item iteration Index of the iteration number that is being executed (first iteration
 will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on).

-\item iter_feval Evaluation metrics for the \code{watchlist} that was supplied, either
+\item iter_feval Evaluation metrics for \code{evals} that were supplied, either
 determined by the objective, or by parameter \code{feval}.

 For \link{xgb.train}, this will be a named vector with one entry per element in
-\code{watchlist}, where the names are determined as 'watchlist name' + '-' + 'metric name' - for
-example, if \code{watchlist} contains an entry named "tr" and the metric is "rmse",
+\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for
+example, if \code{evals} contains an entry named "tr" and the metric is "rmse",
 this will be a one-element vector with name "tr-rmse".

-For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(watchlist), nfolds]},
+For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
 where the row names will follow the same naming logic as the one-dimensional vector
 that is passed in \link{xgb.train}.

@@ -187,18 +187,18 @@ the order in which the callbacks are passed to the model fitting function.
 }
 \examples{
 # Example constructing a custom callback that calculates
-# squared error on the training data, without a watchlist,
+# squared error on the training data (no separate test set),
 # and outputs the per-iteration results.
 ssq_callback <- xgb.Callback(
  cb_name = "ssq",
-  f_before_training = function(env, model, data, watchlist,
+  f_before_training = function(env, model, data, evals,
                               begin_iteration, end_iteration) {
    # A vector to keep track of a number at each iteration
    env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
  },
-  f_after_iter = function(env, model, data, watchlist, iteration, iter_feval) {
+  f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
    # This calculates the sum of squared errors on the training data.
-    # Note that this can be better done by passing a 'watchlist' entry,
+    # Note that this can be better done by passing an 'evals' entry,
    # but this demonstrates a way in which callbacks can be structured.
    pred <- predict(model, data)
    err <- pred - getinfo(data, "label")
@@ -214,7 +214,7 @@ ssq_callback <- xgb.Callback(
    # A return value of 'TRUE' here would signal to finalize the training
    return(FALSE)
  },
-  f_after_training = function(env, model, data, watchlist, iteration,
+  f_after_training = function(env, model, data, evals, iteration,
                              final_feval, prev_cb_res) {
    return(env$logs)
  }
--- a/R-package/man/xgb.cb.early.stop.Rd
+++ b/R-package/man/xgb.cb.early.stop.Rd
@@ -20,9 +20,9 @@ the evaluation metric in order to stop the training.}

 \item{metric_name}{The name of an evaluation column to use as a criteria for early
 stopping. If not set, the last column would be used.
-Let's say the test data in \code{watchlist} was labelled as \code{dtest},
+Let's say the test data in \code{evals} was labelled as \code{dtest},
 and one wants to use the AUC in test data for early stopping regardless of where
-it is in the \code{watchlist}, then one of the following would need to be set:
+it is in the \code{evals}, then one of the following would need to be set:
 \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
 All dash '-' characters in metric names are considered equivalent to '_'.}

@@ -51,5 +51,5 @@ condition occurred. Note that the \code{best_iteration} that is stored under R a
 base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
 through \link{xgb.attr} or \link{xgb.attributes}.

-At least one data element is required in the evaluation watchlist for early stopping to work.
+At least one dataset is required in \code{evals} for early stopping to work.
 }
--- a/R-package/man/xgb.cb.evaluation.log.Rd
+++ b/R-package/man/xgb.cb.evaluation.log.Rd
@@ -14,7 +14,7 @@ Callback for logging the evaluation history
 }
 \details{
 This callback creates a table with per-iteration evaluation metrics (see parameters
-\code{watchlist} and \code{feval} in \link{xgb.train}).
+\code{evals} and \code{feval} in \link{xgb.train}).

 Note: in the column names of the final data.table, the dash '-' character is replaced with
 the underscore '_' in order to make the column names more like regular R identifiers.
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -82,7 +82,6 @@ new.dtrain <- xgb.DMatrix(
 new.dtest <- xgb.DMatrix(
  data = new.features.test, label = agaricus.test$label, nthread = 2
 )
-watchlist <- list(train = new.dtrain)
 bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)

 # Model accuracy with new features
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -9,7 +9,7 @@ xgb.train(
  params = list(),
  data,
  nrounds,
-  watchlist = list(),
+  evals = list(),
  obj = NULL,
  feval = NULL,
  verbose = 1,
@@ -158,13 +158,13 @@ List is provided in detail section.}

 \item{nrounds}{max number of boosting iterations.}

-\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
+\item{evals}{Named list of \code{xgb.DMatrix} datasets to use for evaluating model performance.
 Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
 of these datasets during each boosting iteration, and stored in the end as a field named
 \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
 \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
 printed out during the training.
-E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
 the performance of each round's model on mat1 and mat2.}

 \item{obj}{customized objective function. Returns gradient and second order
@@ -234,7 +234,7 @@ The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
 \details{
 These are the training functions for \code{xgboost}.

-The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+The \code{xgb.train} interface supports advanced features such as \code{evals},
 customized objective and evaluation metric functions, therefore it is more flexible
 than the \code{xgboost} interface.

@@ -272,7 +272,7 @@ The following callbacks are automatically created when certain parameters are se
 \itemize{
 \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
 and the \code{print_every_n} parameter is passed to it.
-\item \code{xgb.cb.evaluation.log} is on when \code{watchlist} is present.
+\item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
 \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
 \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
 }
@@ -307,12 +307,12 @@ dtrain <- with(
 dtest <- with(
  agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
 )
-watchlist <- list(train = dtrain, eval = dtest)
+evals <- list(train = dtrain, eval = dtest)

 ## A simple xgb.train example:
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)

 ## An xgb.train example where custom objective and evaluation metric are
 ## used:
@@ -333,15 +333,15 @@ evalerror <- function(preds, dtrain) {
 #  as 'objective' and 'eval_metric' parameters in the params list:
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = logregobj, eval_metric = evalerror)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)

 #  or through the ... arguments:
 param <- list(max_depth = 2, eta = 1, nthread = nthread)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                 objective = logregobj, eval_metric = evalerror)

 #  or as dedicated 'obj' and 'feval' parameters of xgb.train:
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
                 obj = logregobj, feval = evalerror)


@@ -349,11 +349,11 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
 param <- list(max_depth = 2, eta = 1, nthread = nthread,
              objective = "binary:logistic", eval_metric = "auc")
 my_etas <- list(eta = c(0.5, 0.1))
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
                 callbacks = list(xgb.cb.reset.parameters(my_etas)))

 ## Early stopping:
-bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
                 early_stopping_rounds = 3)

 ## An 'xgboost' interface example: