Deprecate reg:linear' in favor of reg:squarederror'. (#4267)

* Deprecate `reg:linear' in favor of `reg:squarederror'.
* Replace the use of `reg:linear'.
* Replace the use of `silent`.
This commit is contained in:
Jiaming Yuan
2019-03-17 17:55:04 +08:00
committed by GitHub
parent cf8d5b9b76
commit 29a1356669
34 changed files with 210 additions and 193 deletions

View File

@@ -28,12 +28,12 @@ NVL <- function(x, val) {
# Merges booster params with whatever is provided in ...
# plus runs some checks
check.booster.params <- function(params, ...) {
if (typeof(params) != "list")
if (typeof(params) != "list")
stop("params must be a list")
# in R interface, allow for '.' instead of '_' in parameter names
names(params) <- gsub("\\.", "_", names(params))
# merge parameters from the params and the dots-expansion
dot_params <- list(...)
names(dot_params) <- gsub("\\.", "_", names(dot_params))
@@ -41,15 +41,15 @@ check.booster.params <- function(params, ...) {
names(dot_params))) > 0)
stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.")
params <- c(params, dot_params)
# providing a parameter multiple times makes sense only for 'eval_metric'
name_freqs <- table(names(params))
multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric')
if (length(multi_names) > 0) {
warning("The following parameters were provided multiple times:\n\t",
paste(multi_names, collapse = ', '), "\n Only the last value for each of them will be used.\n")
# While xgboost internals would choose the last value for a multiple-times parameter,
# enforce it here in R as well (b/c multi-parameters might be used further in R code,
# While xgboost internals would choose the last value for a multiple-times parameter,
# enforce it here in R as well (b/c multi-parameters might be used further in R code,
# and R takes the 1st value when multiple elements with the same name are present in a list).
for (n in multi_names) {
del_idx <- which(n == names(params))
@@ -57,25 +57,25 @@ check.booster.params <- function(params, ...) {
params[[del_idx]] <- NULL
}
}
# for multiclass, expect num_class to be set
if (typeof(params[['objective']]) == "character" &&
substr(NVL(params[['objective']], 'x'), 1, 6) == 'multi:' &&
as.numeric(NVL(params[['num_class']], 0)) < 2) {
stop("'num_class' > 1 parameter must be set for multiclass classification")
}
# monotone_constraints parser
if (!is.null(params[['monotone_constraints']]) &&
typeof(params[['monotone_constraints']]) != "character") {
vec2str = paste(params[['monotone_constraints']], collapse = ',')
vec2str = paste0('(', vec2str, ')')
params[['monotone_constraints']] = vec2str
}
# interaction constraints parser (convert from list of column indices to string)
if (!is.null(params[['interaction_constraints']]) &&
if (!is.null(params[['interaction_constraints']]) &&
typeof(params[['interaction_constraints']]) != "character"){
# check input class
if (class(params[['interaction_constraints']]) != 'list') stop('interaction_constraints should be class list')
@@ -96,10 +96,10 @@ check.booster.params <- function(params, ...) {
check.custom.obj <- function(env = parent.frame()) {
if (!is.null(env$params[['objective']]) && !is.null(env$obj))
stop("Setting objectives in 'params' and 'obj' at the same time is not allowed")
if (!is.null(env$obj) && typeof(env$obj) != 'closure')
stop("'obj' must be a function")
# handle the case when custom objective function was provided through params
if (!is.null(env$params[['objective']]) &&
typeof(env$params$objective) == 'closure') {
@@ -113,21 +113,21 @@ check.custom.obj <- function(env = parent.frame()) {
check.custom.eval <- function(env = parent.frame()) {
if (!is.null(env$params[['eval_metric']]) && !is.null(env$feval))
stop("Setting evaluation metrics in 'params' and 'feval' at the same time is not allowed")
if (!is.null(env$feval) && typeof(env$feval) != 'closure')
stop("'feval' must be a function")
# handle a situation when custom eval function was provided through params
if (!is.null(env$params[['eval_metric']]) &&
typeof(env$params$eval_metric) == 'closure') {
env$feval <- env$params$eval_metric
env$params$eval_metric <- NULL
}
# require maximize to be set when custom feval and early stopping are used together
if (!is.null(env$feval) &&
is.null(env$maximize) && (
!is.null(env$early_stopping_rounds) ||
!is.null(env$early_stopping_rounds) ||
has.callbacks(env$callbacks, 'cb.early.stop')))
stop("Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not")
}
@@ -154,15 +154,15 @@ xgb.iter.update <- function(booster_handle, dtrain, iter, obj = NULL) {
# Evaluate one iteration.
# Returns a named vector of evaluation metrics
# Returns a named vector of evaluation metrics
# with the names in a 'datasetname-metricname' format.
xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
if (!identical(class(booster_handle), "xgb.Booster.handle"))
stop("class of booster_handle must be xgb.Booster.handle")
if (length(watchlist) == 0)
if (length(watchlist) == 0)
return(NULL)
evnames <- names(watchlist)
if (is.null(feval)) {
msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames))
@@ -189,7 +189,7 @@ xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
# Generates random (stratified if needed) CV folds
generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
# cannot do it for rank
if (exists('objective', where = params) &&
is.character(params$objective) &&
@@ -209,13 +209,14 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
if (exists('objective', where = params) &&
is.character(params$objective)) {
# If 'objective' provided in params, assume that y is a classification label
# unless objective is reg:linear
if (params$objective != 'reg:linear')
# unless objective is reg:squarederror
if (params$objective != 'reg:squarederror')
y <- factor(y)
} else {
# If no 'objective' given in params, it means that user either wants to use
# the default 'reg:linear' objective or has provided a custom obj function.
# Here, assume classification setting when y has 5 or less unique values:
# If no 'objective' given in params, it means that user either wants to
# use the default 'reg:squarederror' objective or has provided a custom
# obj function. Here, assume classification setting when y has 5 or less
# unique values:
if (length(unique(y)) <= 5)
y <- factor(y)
}
@@ -293,22 +294,22 @@ xgb.createFolds <- function(y, k = 10)
#
#' Deprecation notices.
#'
#'
#' At this time, some of the parameter names were changed in order to make the code style more uniform.
#' The deprecated parameters would be removed in the next release.
#'
#'
#' To see all the current deprecated and new parameters, check the \code{xgboost:::depr_par_lut} table.
#'
#' A deprecation warning is shown when any of the deprecated parameters is used in a call.
#' An additional warning is shown when there was a partial match to a deprecated parameter
#'
#' A deprecation warning is shown when any of the deprecated parameters is used in a call.
#' An additional warning is shown when there was a partial match to a deprecated parameter
#' (as R is able to partially match parameter names).
#'
#'
#' @name xgboost-deprecated
NULL
# Lookup table for the deprecated parameters bookkeeping
depr_par_lut <- matrix(c(
'print.every.n', 'print_every_n',
'print.every.n', 'print_every_n',
'early.stop.round', 'early_stopping_rounds',
'training.data', 'data',
'with.stats', 'with_stats',

View File

@@ -1,12 +1,12 @@
#' Cross Validation
#'
#'
#' The cross validation function of xgboost
#'
#'
#' @param params the list of parameters. Commonly used ones are:
#' \itemize{
#' \item \code{objective} objective function, common ones are
#' \itemize{
#' \item \code{reg:linear} linear regression
#' \item \code{reg:squarederror} Regression with squared loss
#' \item \code{binary:logistic} logistic regression for classification
#' }
#' \item \code{eta} step size of each boosting step
@@ -18,12 +18,12 @@
#' See also demo/ for walkthrough example in R.
#' @param data takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input.
#' @param nrounds the max number of iterations
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label vector of response values. Should be provided only when data is an R-matrix.
#' @param missing is only used when input is a dense matrix. By default is set to NA, which means
#' that NA values should be considered as 'missing' by the algorithm.
#' @param missing is only used when input is a dense matrix. By default is set to NA, which means
#' that NA values should be considered as 'missing' by the algorithm.
#' Sometimes, 0 or other extreme value might be used to represent missing values.
#' @param prediction A logical value indicating whether to return the test fold predictions
#' @param prediction A logical value indicating whether to return the test fold predictions
#' from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation
#' @param metrics, list of evaluation metrics to be used in cross validation,
@@ -37,22 +37,22 @@
#' \item \code{aucpr} Area under PR curve
#' \item \code{merror} Exact matching error, used to evaluate multi-class classification
#' }
#' @param obj customized objective function. Returns gradient and second order
#' @param obj customized objective function. Returns gradient and second order
#' gradient with given prediction and dtrain.
#' @param feval custimized evaluation function. Returns
#' \code{list(metric='metric-name', value='metric-value')} with given
#' @param feval custimized evaluation function. Returns
#' \code{list(metric='metric-name', value='metric-value')} with given
#' prediction and dtrain.
#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
#' by the values of outcome labels.
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
#' (each element must be a vector of test fold's indices). When folds are supplied,
#' (each element must be a vector of test fold's indices). When folds are supplied,
#' the \code{nfold} and \code{stratified} parameters are ignored.
#' @param verbose \code{boolean}, print the statistics during the process
#' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
#' Default is 1 which means all messages are printed. This parameter is passed to the
#' Default is 1 which means all messages are printed. This parameter is passed to the
#' \code{\link{cb.print.evaluation}} callback.
#' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
#' If set to an integer \code{k}, training with a validation set will stop if the performance
#' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
#' If set to an integer \code{k}, training with a validation set will stop if the performance
#' doesn't improve for \code{k} rounds.
#' Setting this parameter engages the \code{\link{cb.early.stop}} callback.
#' @param maximize If \code{feval} and \code{early_stopping_rounds} are set,
@@ -60,46 +60,46 @@
#' When it is \code{TRUE}, it means the larger the evaluation score the better.
#' This parameter is passed to the \code{\link{cb.early.stop}} callback.
#' @param callbacks a list of callback functions to perform various task during boosting.
#' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
#' parameters' values. User can provide either existing or their own callback methods in order
#' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
#' parameters' values. User can provide either existing or their own callback methods in order
#' to customize the training process.
#' @param ... other parameters to pass to \code{params}.
#'
#' @details
#' The original sample is randomly partitioned into \code{nfold} equal size subsamples.
#'
#' Of the \code{nfold} subsamples, a single subsample is retained as the validation data for testing the model, and the remaining \code{nfold - 1} subsamples are used as training data.
#'
#'
#' @details
#' The original sample is randomly partitioned into \code{nfold} equal size subsamples.
#'
#' Of the \code{nfold} subsamples, a single subsample is retained as the validation data for testing the model, and the remaining \code{nfold - 1} subsamples are used as training data.
#'
#' The cross-validation process is then repeated \code{nrounds} times, with each of the \code{nfold} subsamples used exactly once as the validation data.
#'
#'
#' All observations are used for both training and validation.
#'
#'
#' Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
#'
#' @return
#' @return
#' An object of class \code{xgb.cv.synchronous} with the following elements:
#' \itemize{
#' \item \code{call} a function call.
#' \item \code{params} parameters that were passed to the xgboost library. Note that it does not
#' \item \code{params} parameters that were passed to the xgboost library. Note that it does not
#' capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
#' \item \code{callbacks} callback functions that were either automatically assigned or
#' \item \code{callbacks} callback functions that were either automatically assigned or
#' explicitly passed.
#' \item \code{evaluation_log} evaluation history storead as a \code{data.table} with the
#' first column corresponding to iteration number and the rest corresponding to the
#' first column corresponding to iteration number and the rest corresponding to the
#' CV-based evaluation means and standard deviations for the training and test CV-sets.
#' It is created by the \code{\link{cb.evaluation.log}} callback.
#' \item \code{niter} number of boosting iterations.
#' \item \code{nfeatures} number of features in training data.
#' \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
#' \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
#' parameter or randomly generated.
#' \item \code{best_iteration} iteration number with the best evaluation metric value
#' (only available with early stopping).
#' \item \code{best_ntreelimit} the \code{ntreelimit} value corresponding to the best iteration,
#' \item \code{best_ntreelimit} the \code{ntreelimit} value corresponding to the best iteration,
#' which could further be used in \code{predict} method
#' (only available with early stopping).
#' \item \code{pred} CV prediction values available when \code{prediction} is set.
#' \item \code{pred} CV prediction values available when \code{prediction} is set.
#' It is either vector or matrix (see \code{\link{cb.cv.predict}}).
#' \item \code{models} a liost of the CV folds' models. It is only available with the explicit
#' \item \code{models} a liost of the CV folds' models. It is only available with the explicit
#' setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
#' }
#'
@@ -110,32 +110,32 @@
#' max_depth = 3, eta = 1, objective = "binary:logistic")
#' print(cv)
#' print(cv, verbose=TRUE)
#'
#'
#' @export
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
prediction = FALSE, showsd = TRUE, metrics=list(),
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL,
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL,
verbose = TRUE, print_every_n=1L,
early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) {
check.deprecation(...)
params <- check.booster.params(params, ...)
# TODO: should we deprecate the redundant 'metrics' parameter?
for (m in metrics)
params <- c(params, list("eval_metric" = m))
check.custom.obj()
check.custom.eval()
#if (is.null(params[['eval_metric']]) && is.null(feval))
# stop("Either 'eval_metric' or 'feval' must be provided for CV")
# Check the labels
if ( (inherits(data, 'xgb.DMatrix') && is.null(getinfo(data, 'label'))) ||
(!inherits(data, 'xgb.DMatrix') && is.null(label)))
stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix")
# CV folds
if(!is.null(folds)) {
if(!is.list(folds) || length(folds) < 2)
@@ -146,7 +146,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
stop("'nfold' must be > 1")
folds <- generate.cv.folds(nfold, nrow(data), stratified, label, params)
}
# Potential TODO: sequential CV
#if (strategy == 'sequential')
# stop('Sequential CV strategy is not yet implemented')
@@ -166,7 +166,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
stop_condition <- FALSE
if (!is.null(early_stopping_rounds) &&
!has.callbacks(callbacks, 'cb.early.stop')) {
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
maximize = maximize, verbose = verbose))
}
# CV-predictions callback
@@ -177,7 +177,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
# Sort the callbacks into categories
cb <- categorize.callbacks(callbacks)
# create the booster-folds
dall <- xgb.get.DMatrix(data, label, missing)
bst_folds <- lapply(seq_along(folds), function(k) {
@@ -197,12 +197,12 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
# those are fixed for CV (no training continuation)
begin_iteration <- 1
end_iteration <- nrounds
# synchronous CV boosting: run CV folds' models within each iteration
for (iteration in begin_iteration:end_iteration) {
for (f in cb$pre_iter) f()
msg <- lapply(bst_folds, function(fd) {
xgb.iter.update(fd$bst, fd$dtrain, iteration - 1, obj)
xgb.iter.eval(fd$bst, fd$watchlist, iteration - 1, feval)
@@ -210,9 +210,9 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
msg <- simplify2array(msg)
bst_evaluation <- rowMeans(msg)
bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2)
for (f in cb$post_iter) f()
if (stop_condition) break
}
for (f in cb$finalize) f(finalize = TRUE)
@@ -236,17 +236,17 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
#' Print xgb.cv result
#'
#'
#' Prints formatted results of \code{xgb.cv}.
#'
#'
#' @param x an \code{xgb.cv.synchronous} object
#' @param verbose whether to print detailed data
#' @param ... passed to \code{data.table.print}
#'
#'
#' @details
#' When not verbose, it would only print the evaluation results,
#' When not verbose, it would only print the evaluation results,
#' including the best iteration (when available).
#'
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
@@ -254,13 +254,13 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' print(cv)
#' print(cv, verbose=TRUE)
#'
#'
#' @rdname print.xgb.cv
#' @method print xgb.cv.synchronous
#' @export
print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep = '')
if (verbose) {
if (!is.null(x$call)) {
cat('call:\n ')
@@ -268,8 +268,8 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
}
if (!is.null(x$params)) {
cat('params (as set within xgb.cv):\n')
cat( ' ',
paste(names(x$params),
cat( ' ',
paste(names(x$params),
paste0('"', unlist(x$params), '"'),
sep = ' = ', collapse = ', '), '\n', sep = '')
}
@@ -280,9 +280,9 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
print(x)
})
}
for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
if (is.null(x[[n]]))
if (is.null(x[[n]]))
next
cat(n, ': ', x[[n]], '\n', sep = '')
}
@@ -293,10 +293,10 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
}
}
if (verbose)
if (verbose)
cat('evaluation_log:\n')
print(x$evaluation_log, row.names = FALSE, ...)
if (!is.null(x$best_iteration)) {
cat('Best iteration:\n')
print(x$evaluation_log[x$best_iteration], row.names = FALSE, ...)

View File

@@ -42,7 +42,7 @@
#' \itemize{
#' \item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
#' \itemize{
#' \item \code{reg:linear} linear regression (Default).
#' \item \code{reg:squarederror} Regression with squared loss (Default).
#' \item \code{reg:logistic} logistic regression.
#' \item \code{binary:logistic} logistic regression for binary classification. Output probability.
#' \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.

View File

@@ -16,7 +16,7 @@ xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
\itemize{
\item \code{objective} objective function, common ones are
\itemize{
\item \code{reg:linear} linear regression
\item \code{reg:squarederror} Regression with squared loss.
\item \code{binary:logistic} logistic regression for classification
}
\item \code{eta} step size of each boosting step

View File

@@ -56,7 +56,7 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
\itemize{
\item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
\itemize{
\item \code{reg:linear} linear regression (Default).
\item \code{reg:squarederror} Regression with squared loss (Default).
\item \code{reg:logistic} logistic regression.
\item \code{binary:logistic} logistic regression for binary classification. Output probability.
\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
@@ -210,7 +210,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
watchlist <- list(train = dtrain, eval = dtest)
## A simple xgb.train example:
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
objective = "binary:logistic", eval_metric = "auc")
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
@@ -231,12 +231,12 @@ evalerror <- function(preds, dtrain) {
# These functions could be used by passing them either:
# as 'objective' and 'eval_metric' parameters in the params list:
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
objective = logregobj, eval_metric = evalerror)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
# or through the ... arguments:
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2)
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
objective = logregobj, eval_metric = evalerror)
@@ -246,7 +246,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
## An xgb.train example of using variable learning rates at each iteration:
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
objective = "binary:logistic", eval_metric = "auc")
my_etas <- list(eta = c(0.5, 0.1))
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,

View File

@@ -10,12 +10,12 @@ test_label <- agaricus.test$label[1:100]
test_that("xgb.DMatrix: basic construction", {
# from sparse matrix
dtest1 <- xgb.DMatrix(test_data, label=test_label)
# from dense matrix
# from dense matrix
dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label'))
expect_equal(dim(dtest1), dim(dtest2))
#from dense integer matrix
int_data <- as.matrix(test_data)
storage.mode(int_data) <- "integer"
@@ -33,7 +33,7 @@ test_that("xgb.DMatrix: saving, loading", {
expect_output(dtest3 <- xgb.DMatrix(tmp_file, silent = TRUE), NA)
unlink(tmp_file)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
# from a libsvm text file
tmp <- c("0 1:1 2:1","1 3:1","0 1:1")
tmp_file <- 'tmp.libsvm'
@@ -49,7 +49,7 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
expect_true(setinfo(dtest, 'label', test_label))
labels <- getinfo(dtest, 'label')
expect_equal(test_label, getinfo(dtest, 'label'))
expect_true(length(getinfo(dtest, 'weight')) == 0)
expect_true(length(getinfo(dtest, 'base_margin')) == 0)
@@ -57,10 +57,10 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
expect_true(setinfo(dtest, 'base_margin', test_label))
expect_true(setinfo(dtest, 'group', c(50,50)))
expect_error(setinfo(dtest, 'group', test_label))
# providing character values will give a warning
expect_warning( setinfo(dtest, 'weight', rep('a', nrow(test_data))) )
# any other label should error
expect_error(setinfo(dtest, 'asdf', test_label))
})
@@ -71,7 +71,7 @@ test_that("xgb.DMatrix: slice, dim", {
dsub1 <- slice(dtest, 1:42)
expect_equal(nrow(dsub1), 42)
expect_equal(ncol(dsub1), ncol(test_data))
dsub2 <- dtest[1:42,]
expect_equal(dim(dtest), dim(test_data))
expect_equal(getinfo(dsub1, 'label'), getinfo(dsub2, 'label'))

View File

@@ -98,7 +98,7 @@ test_that("SHAP contribution values are not NAN", {
fit <- xgboost(
verbose = 0,
params = list(
objective = "reg:linear",
objective = "reg:squarederror",
eval_metric = "rmse"),
data = as.matrix(subset(d, fold == 2)[, ivs]),
label = subset(d, fold == 2)$y,