Deprecate reg:linear' in favor of reg:squarederror'. (#4267)

* Deprecate `reg:linear' in favor of `reg:squarederror'.
* Replace the use of `reg:linear'.
* Replace the use of `silent`.
This commit is contained in:
Jiaming Yuan 2019-03-17 17:55:04 +08:00 committed by GitHub
parent cf8d5b9b76
commit 29a1356669
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 210 additions and 193 deletions

View File

@ -28,12 +28,12 @@ NVL <- function(x, val) {
# Merges booster params with whatever is provided in ... # Merges booster params with whatever is provided in ...
# plus runs some checks # plus runs some checks
check.booster.params <- function(params, ...) { check.booster.params <- function(params, ...) {
if (typeof(params) != "list") if (typeof(params) != "list")
stop("params must be a list") stop("params must be a list")
# in R interface, allow for '.' instead of '_' in parameter names # in R interface, allow for '.' instead of '_' in parameter names
names(params) <- gsub("\\.", "_", names(params)) names(params) <- gsub("\\.", "_", names(params))
# merge parameters from the params and the dots-expansion # merge parameters from the params and the dots-expansion
dot_params <- list(...) dot_params <- list(...)
names(dot_params) <- gsub("\\.", "_", names(dot_params)) names(dot_params) <- gsub("\\.", "_", names(dot_params))
@ -41,15 +41,15 @@ check.booster.params <- function(params, ...) {
names(dot_params))) > 0) names(dot_params))) > 0)
stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.") stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.")
params <- c(params, dot_params) params <- c(params, dot_params)
# providing a parameter multiple times makes sense only for 'eval_metric' # providing a parameter multiple times makes sense only for 'eval_metric'
name_freqs <- table(names(params)) name_freqs <- table(names(params))
multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric') multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric')
if (length(multi_names) > 0) { if (length(multi_names) > 0) {
warning("The following parameters were provided multiple times:\n\t", warning("The following parameters were provided multiple times:\n\t",
paste(multi_names, collapse = ', '), "\n Only the last value for each of them will be used.\n") paste(multi_names, collapse = ', '), "\n Only the last value for each of them will be used.\n")
# While xgboost internals would choose the last value for a multiple-times parameter, # While xgboost internals would choose the last value for a multiple-times parameter,
# enforce it here in R as well (b/c multi-parameters might be used further in R code, # enforce it here in R as well (b/c multi-parameters might be used further in R code,
# and R takes the 1st value when multiple elements with the same name are present in a list). # and R takes the 1st value when multiple elements with the same name are present in a list).
for (n in multi_names) { for (n in multi_names) {
del_idx <- which(n == names(params)) del_idx <- which(n == names(params))
@ -57,25 +57,25 @@ check.booster.params <- function(params, ...) {
params[[del_idx]] <- NULL params[[del_idx]] <- NULL
} }
} }
# for multiclass, expect num_class to be set # for multiclass, expect num_class to be set
if (typeof(params[['objective']]) == "character" && if (typeof(params[['objective']]) == "character" &&
substr(NVL(params[['objective']], 'x'), 1, 6) == 'multi:' && substr(NVL(params[['objective']], 'x'), 1, 6) == 'multi:' &&
as.numeric(NVL(params[['num_class']], 0)) < 2) { as.numeric(NVL(params[['num_class']], 0)) < 2) {
stop("'num_class' > 1 parameter must be set for multiclass classification") stop("'num_class' > 1 parameter must be set for multiclass classification")
} }
# monotone_constraints parser # monotone_constraints parser
if (!is.null(params[['monotone_constraints']]) && if (!is.null(params[['monotone_constraints']]) &&
typeof(params[['monotone_constraints']]) != "character") { typeof(params[['monotone_constraints']]) != "character") {
vec2str = paste(params[['monotone_constraints']], collapse = ',') vec2str = paste(params[['monotone_constraints']], collapse = ',')
vec2str = paste0('(', vec2str, ')') vec2str = paste0('(', vec2str, ')')
params[['monotone_constraints']] = vec2str params[['monotone_constraints']] = vec2str
} }
# interaction constraints parser (convert from list of column indices to string) # interaction constraints parser (convert from list of column indices to string)
if (!is.null(params[['interaction_constraints']]) && if (!is.null(params[['interaction_constraints']]) &&
typeof(params[['interaction_constraints']]) != "character"){ typeof(params[['interaction_constraints']]) != "character"){
# check input class # check input class
if (class(params[['interaction_constraints']]) != 'list') stop('interaction_constraints should be class list') if (class(params[['interaction_constraints']]) != 'list') stop('interaction_constraints should be class list')
@ -96,10 +96,10 @@ check.booster.params <- function(params, ...) {
check.custom.obj <- function(env = parent.frame()) { check.custom.obj <- function(env = parent.frame()) {
if (!is.null(env$params[['objective']]) && !is.null(env$obj)) if (!is.null(env$params[['objective']]) && !is.null(env$obj))
stop("Setting objectives in 'params' and 'obj' at the same time is not allowed") stop("Setting objectives in 'params' and 'obj' at the same time is not allowed")
if (!is.null(env$obj) && typeof(env$obj) != 'closure') if (!is.null(env$obj) && typeof(env$obj) != 'closure')
stop("'obj' must be a function") stop("'obj' must be a function")
# handle the case when custom objective function was provided through params # handle the case when custom objective function was provided through params
if (!is.null(env$params[['objective']]) && if (!is.null(env$params[['objective']]) &&
typeof(env$params$objective) == 'closure') { typeof(env$params$objective) == 'closure') {
@ -113,21 +113,21 @@ check.custom.obj <- function(env = parent.frame()) {
check.custom.eval <- function(env = parent.frame()) { check.custom.eval <- function(env = parent.frame()) {
if (!is.null(env$params[['eval_metric']]) && !is.null(env$feval)) if (!is.null(env$params[['eval_metric']]) && !is.null(env$feval))
stop("Setting evaluation metrics in 'params' and 'feval' at the same time is not allowed") stop("Setting evaluation metrics in 'params' and 'feval' at the same time is not allowed")
if (!is.null(env$feval) && typeof(env$feval) != 'closure') if (!is.null(env$feval) && typeof(env$feval) != 'closure')
stop("'feval' must be a function") stop("'feval' must be a function")
# handle a situation when custom eval function was provided through params # handle a situation when custom eval function was provided through params
if (!is.null(env$params[['eval_metric']]) && if (!is.null(env$params[['eval_metric']]) &&
typeof(env$params$eval_metric) == 'closure') { typeof(env$params$eval_metric) == 'closure') {
env$feval <- env$params$eval_metric env$feval <- env$params$eval_metric
env$params$eval_metric <- NULL env$params$eval_metric <- NULL
} }
# require maximize to be set when custom feval and early stopping are used together # require maximize to be set when custom feval and early stopping are used together
if (!is.null(env$feval) && if (!is.null(env$feval) &&
is.null(env$maximize) && ( is.null(env$maximize) && (
!is.null(env$early_stopping_rounds) || !is.null(env$early_stopping_rounds) ||
has.callbacks(env$callbacks, 'cb.early.stop'))) has.callbacks(env$callbacks, 'cb.early.stop')))
stop("Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not") stop("Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not")
} }
@ -154,15 +154,15 @@ xgb.iter.update <- function(booster_handle, dtrain, iter, obj = NULL) {
# Evaluate one iteration. # Evaluate one iteration.
# Returns a named vector of evaluation metrics # Returns a named vector of evaluation metrics
# with the names in a 'datasetname-metricname' format. # with the names in a 'datasetname-metricname' format.
xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) { xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
if (!identical(class(booster_handle), "xgb.Booster.handle")) if (!identical(class(booster_handle), "xgb.Booster.handle"))
stop("class of booster_handle must be xgb.Booster.handle") stop("class of booster_handle must be xgb.Booster.handle")
if (length(watchlist) == 0) if (length(watchlist) == 0)
return(NULL) return(NULL)
evnames <- names(watchlist) evnames <- names(watchlist)
if (is.null(feval)) { if (is.null(feval)) {
msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames)) msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames))
@ -189,7 +189,7 @@ xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
# Generates random (stratified if needed) CV folds # Generates random (stratified if needed) CV folds
generate.cv.folds <- function(nfold, nrows, stratified, label, params) { generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
# cannot do it for rank # cannot do it for rank
if (exists('objective', where = params) && if (exists('objective', where = params) &&
is.character(params$objective) && is.character(params$objective) &&
@ -209,13 +209,14 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
if (exists('objective', where = params) && if (exists('objective', where = params) &&
is.character(params$objective)) { is.character(params$objective)) {
# If 'objective' provided in params, assume that y is a classification label # If 'objective' provided in params, assume that y is a classification label
# unless objective is reg:linear # unless objective is reg:squarederror
if (params$objective != 'reg:linear') if (params$objective != 'reg:squarederror')
y <- factor(y) y <- factor(y)
} else { } else {
# If no 'objective' given in params, it means that user either wants to use # If no 'objective' given in params, it means that user either wants to
# the default 'reg:linear' objective or has provided a custom obj function. # use the default 'reg:squarederror' objective or has provided a custom
# Here, assume classification setting when y has 5 or less unique values: # obj function. Here, assume classification setting when y has 5 or less
# unique values:
if (length(unique(y)) <= 5) if (length(unique(y)) <= 5)
y <- factor(y) y <- factor(y)
} }
@ -293,22 +294,22 @@ xgb.createFolds <- function(y, k = 10)
# #
#' Deprecation notices. #' Deprecation notices.
#' #'
#' At this time, some of the parameter names were changed in order to make the code style more uniform. #' At this time, some of the parameter names were changed in order to make the code style more uniform.
#' The deprecated parameters would be removed in the next release. #' The deprecated parameters would be removed in the next release.
#' #'
#' To see all the current deprecated and new parameters, check the \code{xgboost:::depr_par_lut} table. #' To see all the current deprecated and new parameters, check the \code{xgboost:::depr_par_lut} table.
#' #'
#' A deprecation warning is shown when any of the deprecated parameters is used in a call. #' A deprecation warning is shown when any of the deprecated parameters is used in a call.
#' An additional warning is shown when there was a partial match to a deprecated parameter #' An additional warning is shown when there was a partial match to a deprecated parameter
#' (as R is able to partially match parameter names). #' (as R is able to partially match parameter names).
#' #'
#' @name xgboost-deprecated #' @name xgboost-deprecated
NULL NULL
# Lookup table for the deprecated parameters bookkeeping # Lookup table for the deprecated parameters bookkeeping
depr_par_lut <- matrix(c( depr_par_lut <- matrix(c(
'print.every.n', 'print_every_n', 'print.every.n', 'print_every_n',
'early.stop.round', 'early_stopping_rounds', 'early.stop.round', 'early_stopping_rounds',
'training.data', 'data', 'training.data', 'data',
'with.stats', 'with_stats', 'with.stats', 'with_stats',

View File

@ -1,12 +1,12 @@
#' Cross Validation #' Cross Validation
#' #'
#' The cross validation function of xgboost #' The cross validation function of xgboost
#' #'
#' @param params the list of parameters. Commonly used ones are: #' @param params the list of parameters. Commonly used ones are:
#' \itemize{ #' \itemize{
#' \item \code{objective} objective function, common ones are #' \item \code{objective} objective function, common ones are
#' \itemize{ #' \itemize{
#' \item \code{reg:linear} linear regression #' \item \code{reg:squarederror} Regression with squared loss
#' \item \code{binary:logistic} logistic regression for classification #' \item \code{binary:logistic} logistic regression for classification
#' } #' }
#' \item \code{eta} step size of each boosting step #' \item \code{eta} step size of each boosting step
@ -18,12 +18,12 @@
#' See also demo/ for walkthrough example in R. #' See also demo/ for walkthrough example in R.
#' @param data takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input. #' @param data takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input.
#' @param nrounds the max number of iterations #' @param nrounds the max number of iterations
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples. #' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label vector of response values. Should be provided only when data is an R-matrix. #' @param label vector of response values. Should be provided only when data is an R-matrix.
#' @param missing is only used when input is a dense matrix. By default is set to NA, which means #' @param missing is only used when input is a dense matrix. By default is set to NA, which means
#' that NA values should be considered as 'missing' by the algorithm. #' that NA values should be considered as 'missing' by the algorithm.
#' Sometimes, 0 or other extreme value might be used to represent missing values. #' Sometimes, 0 or other extreme value might be used to represent missing values.
#' @param prediction A logical value indicating whether to return the test fold predictions #' @param prediction A logical value indicating whether to return the test fold predictions
#' from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback. #' from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation #' @param showsd \code{boolean}, whether to show standard deviation of cross validation
#' @param metrics, list of evaluation metrics to be used in cross validation, #' @param metrics, list of evaluation metrics to be used in cross validation,
@ -37,22 +37,22 @@
#' \item \code{aucpr} Area under PR curve #' \item \code{aucpr} Area under PR curve
#' \item \code{merror} Exact matching error, used to evaluate multi-class classification #' \item \code{merror} Exact matching error, used to evaluate multi-class classification
#' } #' }
#' @param obj customized objective function. Returns gradient and second order #' @param obj customized objective function. Returns gradient and second order
#' gradient with given prediction and dtrain. #' gradient with given prediction and dtrain.
#' @param feval custimized evaluation function. Returns #' @param feval custimized evaluation function. Returns
#' \code{list(metric='metric-name', value='metric-value')} with given #' \code{list(metric='metric-name', value='metric-value')} with given
#' prediction and dtrain. #' prediction and dtrain.
#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
#' by the values of outcome labels. #' by the values of outcome labels.
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
#' (each element must be a vector of test fold's indices). When folds are supplied, #' (each element must be a vector of test fold's indices). When folds are supplied,
#' the \code{nfold} and \code{stratified} parameters are ignored. #' the \code{nfold} and \code{stratified} parameters are ignored.
#' @param verbose \code{boolean}, print the statistics during the process #' @param verbose \code{boolean}, print the statistics during the process
#' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}. #' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
#' Default is 1 which means all messages are printed. This parameter is passed to the #' Default is 1 which means all messages are printed. This parameter is passed to the
#' \code{\link{cb.print.evaluation}} callback. #' \code{\link{cb.print.evaluation}} callback.
#' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered. #' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
#' If set to an integer \code{k}, training with a validation set will stop if the performance #' If set to an integer \code{k}, training with a validation set will stop if the performance
#' doesn't improve for \code{k} rounds. #' doesn't improve for \code{k} rounds.
#' Setting this parameter engages the \code{\link{cb.early.stop}} callback. #' Setting this parameter engages the \code{\link{cb.early.stop}} callback.
#' @param maximize If \code{feval} and \code{early_stopping_rounds} are set, #' @param maximize If \code{feval} and \code{early_stopping_rounds} are set,
@ -60,46 +60,46 @@
#' When it is \code{TRUE}, it means the larger the evaluation score the better. #' When it is \code{TRUE}, it means the larger the evaluation score the better.
#' This parameter is passed to the \code{\link{cb.early.stop}} callback. #' This parameter is passed to the \code{\link{cb.early.stop}} callback.
#' @param callbacks a list of callback functions to perform various task during boosting. #' @param callbacks a list of callback functions to perform various task during boosting.
#' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the #' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
#' parameters' values. User can provide either existing or their own callback methods in order #' parameters' values. User can provide either existing or their own callback methods in order
#' to customize the training process. #' to customize the training process.
#' @param ... other parameters to pass to \code{params}. #' @param ... other parameters to pass to \code{params}.
#' #'
#' @details #' @details
#' The original sample is randomly partitioned into \code{nfold} equal size subsamples. #' The original sample is randomly partitioned into \code{nfold} equal size subsamples.
#' #'
#' Of the \code{nfold} subsamples, a single subsample is retained as the validation data for testing the model, and the remaining \code{nfold - 1} subsamples are used as training data. #' Of the \code{nfold} subsamples, a single subsample is retained as the validation data for testing the model, and the remaining \code{nfold - 1} subsamples are used as training data.
#' #'
#' The cross-validation process is then repeated \code{nrounds} times, with each of the \code{nfold} subsamples used exactly once as the validation data. #' The cross-validation process is then repeated \code{nrounds} times, with each of the \code{nfold} subsamples used exactly once as the validation data.
#' #'
#' All observations are used for both training and validation. #' All observations are used for both training and validation.
#' #'
#' Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation} #' Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
#' #'
#' @return #' @return
#' An object of class \code{xgb.cv.synchronous} with the following elements: #' An object of class \code{xgb.cv.synchronous} with the following elements:
#' \itemize{ #' \itemize{
#' \item \code{call} a function call. #' \item \code{call} a function call.
#' \item \code{params} parameters that were passed to the xgboost library. Note that it does not #' \item \code{params} parameters that were passed to the xgboost library. Note that it does not
#' capture parameters changed by the \code{\link{cb.reset.parameters}} callback. #' capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
#' \item \code{callbacks} callback functions that were either automatically assigned or #' \item \code{callbacks} callback functions that were either automatically assigned or
#' explicitly passed. #' explicitly passed.
#' \item \code{evaluation_log} evaluation history storead as a \code{data.table} with the #' \item \code{evaluation_log} evaluation history storead as a \code{data.table} with the
#' first column corresponding to iteration number and the rest corresponding to the #' first column corresponding to iteration number and the rest corresponding to the
#' CV-based evaluation means and standard deviations for the training and test CV-sets. #' CV-based evaluation means and standard deviations for the training and test CV-sets.
#' It is created by the \code{\link{cb.evaluation.log}} callback. #' It is created by the \code{\link{cb.evaluation.log}} callback.
#' \item \code{niter} number of boosting iterations. #' \item \code{niter} number of boosting iterations.
#' \item \code{nfeatures} number of features in training data. #' \item \code{nfeatures} number of features in training data.
#' \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds} #' \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
#' parameter or randomly generated. #' parameter or randomly generated.
#' \item \code{best_iteration} iteration number with the best evaluation metric value #' \item \code{best_iteration} iteration number with the best evaluation metric value
#' (only available with early stopping). #' (only available with early stopping).
#' \item \code{best_ntreelimit} the \code{ntreelimit} value corresponding to the best iteration, #' \item \code{best_ntreelimit} the \code{ntreelimit} value corresponding to the best iteration,
#' which could further be used in \code{predict} method #' which could further be used in \code{predict} method
#' (only available with early stopping). #' (only available with early stopping).
#' \item \code{pred} CV prediction values available when \code{prediction} is set. #' \item \code{pred} CV prediction values available when \code{prediction} is set.
#' It is either vector or matrix (see \code{\link{cb.cv.predict}}). #' It is either vector or matrix (see \code{\link{cb.cv.predict}}).
#' \item \code{models} a liost of the CV folds' models. It is only available with the explicit #' \item \code{models} a liost of the CV folds' models. It is only available with the explicit
#' setting of the \code{cb.cv.predict(save_models = TRUE)} callback. #' setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
#' } #' }
#' #'
@ -110,32 +110,32 @@
#' max_depth = 3, eta = 1, objective = "binary:logistic") #' max_depth = 3, eta = 1, objective = "binary:logistic")
#' print(cv) #' print(cv)
#' print(cv, verbose=TRUE) #' print(cv, verbose=TRUE)
#' #'
#' @export #' @export
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA, xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
prediction = FALSE, showsd = TRUE, metrics=list(), prediction = FALSE, showsd = TRUE, metrics=list(),
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, obj = NULL, feval = NULL, stratified = TRUE, folds = NULL,
verbose = TRUE, print_every_n=1L, verbose = TRUE, print_every_n=1L,
early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) { early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) {
check.deprecation(...) check.deprecation(...)
params <- check.booster.params(params, ...) params <- check.booster.params(params, ...)
# TODO: should we deprecate the redundant 'metrics' parameter? # TODO: should we deprecate the redundant 'metrics' parameter?
for (m in metrics) for (m in metrics)
params <- c(params, list("eval_metric" = m)) params <- c(params, list("eval_metric" = m))
check.custom.obj() check.custom.obj()
check.custom.eval() check.custom.eval()
#if (is.null(params[['eval_metric']]) && is.null(feval)) #if (is.null(params[['eval_metric']]) && is.null(feval))
# stop("Either 'eval_metric' or 'feval' must be provided for CV") # stop("Either 'eval_metric' or 'feval' must be provided for CV")
# Check the labels # Check the labels
if ( (inherits(data, 'xgb.DMatrix') && is.null(getinfo(data, 'label'))) || if ( (inherits(data, 'xgb.DMatrix') && is.null(getinfo(data, 'label'))) ||
(!inherits(data, 'xgb.DMatrix') && is.null(label))) (!inherits(data, 'xgb.DMatrix') && is.null(label)))
stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix") stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix")
# CV folds # CV folds
if(!is.null(folds)) { if(!is.null(folds)) {
if(!is.list(folds) || length(folds) < 2) if(!is.list(folds) || length(folds) < 2)
@ -146,7 +146,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
stop("'nfold' must be > 1") stop("'nfold' must be > 1")
folds <- generate.cv.folds(nfold, nrow(data), stratified, label, params) folds <- generate.cv.folds(nfold, nrow(data), stratified, label, params)
} }
# Potential TODO: sequential CV # Potential TODO: sequential CV
#if (strategy == 'sequential') #if (strategy == 'sequential')
# stop('Sequential CV strategy is not yet implemented') # stop('Sequential CV strategy is not yet implemented')
@ -166,7 +166,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
stop_condition <- FALSE stop_condition <- FALSE
if (!is.null(early_stopping_rounds) && if (!is.null(early_stopping_rounds) &&
!has.callbacks(callbacks, 'cb.early.stop')) { !has.callbacks(callbacks, 'cb.early.stop')) {
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds, callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
maximize = maximize, verbose = verbose)) maximize = maximize, verbose = verbose))
} }
# CV-predictions callback # CV-predictions callback
@ -177,7 +177,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
# Sort the callbacks into categories # Sort the callbacks into categories
cb <- categorize.callbacks(callbacks) cb <- categorize.callbacks(callbacks)
# create the booster-folds # create the booster-folds
dall <- xgb.get.DMatrix(data, label, missing) dall <- xgb.get.DMatrix(data, label, missing)
bst_folds <- lapply(seq_along(folds), function(k) { bst_folds <- lapply(seq_along(folds), function(k) {
@ -197,12 +197,12 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
# those are fixed for CV (no training continuation) # those are fixed for CV (no training continuation)
begin_iteration <- 1 begin_iteration <- 1
end_iteration <- nrounds end_iteration <- nrounds
# synchronous CV boosting: run CV folds' models within each iteration # synchronous CV boosting: run CV folds' models within each iteration
for (iteration in begin_iteration:end_iteration) { for (iteration in begin_iteration:end_iteration) {
for (f in cb$pre_iter) f() for (f in cb$pre_iter) f()
msg <- lapply(bst_folds, function(fd) { msg <- lapply(bst_folds, function(fd) {
xgb.iter.update(fd$bst, fd$dtrain, iteration - 1, obj) xgb.iter.update(fd$bst, fd$dtrain, iteration - 1, obj)
xgb.iter.eval(fd$bst, fd$watchlist, iteration - 1, feval) xgb.iter.eval(fd$bst, fd$watchlist, iteration - 1, feval)
@ -210,9 +210,9 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
msg <- simplify2array(msg) msg <- simplify2array(msg)
bst_evaluation <- rowMeans(msg) bst_evaluation <- rowMeans(msg)
bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2) bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2)
for (f in cb$post_iter) f() for (f in cb$post_iter) f()
if (stop_condition) break if (stop_condition) break
} }
for (f in cb$finalize) f(finalize = TRUE) for (f in cb$finalize) f(finalize = TRUE)
@ -236,17 +236,17 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
#' Print xgb.cv result #' Print xgb.cv result
#' #'
#' Prints formatted results of \code{xgb.cv}. #' Prints formatted results of \code{xgb.cv}.
#' #'
#' @param x an \code{xgb.cv.synchronous} object #' @param x an \code{xgb.cv.synchronous} object
#' @param verbose whether to print detailed data #' @param verbose whether to print detailed data
#' @param ... passed to \code{data.table.print} #' @param ... passed to \code{data.table.print}
#' #'
#' @details #' @details
#' When not verbose, it would only print the evaluation results, #' When not verbose, it would only print the evaluation results,
#' including the best iteration (when available). #' including the best iteration (when available).
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
#' train <- agaricus.train #' train <- agaricus.train
@ -254,13 +254,13 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic") #' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' print(cv) #' print(cv)
#' print(cv, verbose=TRUE) #' print(cv, verbose=TRUE)
#' #'
#' @rdname print.xgb.cv #' @rdname print.xgb.cv
#' @method print xgb.cv.synchronous #' @method print xgb.cv.synchronous
#' @export #' @export
print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) { print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep = '') cat('##### xgb.cv ', length(x$folds), '-folds\n', sep = '')
if (verbose) { if (verbose) {
if (!is.null(x$call)) { if (!is.null(x$call)) {
cat('call:\n ') cat('call:\n ')
@ -268,8 +268,8 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
} }
if (!is.null(x$params)) { if (!is.null(x$params)) {
cat('params (as set within xgb.cv):\n') cat('params (as set within xgb.cv):\n')
cat( ' ', cat( ' ',
paste(names(x$params), paste(names(x$params),
paste0('"', unlist(x$params), '"'), paste0('"', unlist(x$params), '"'),
sep = ' = ', collapse = ', '), '\n', sep = '') sep = ' = ', collapse = ', '), '\n', sep = '')
} }
@ -280,9 +280,9 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
print(x) print(x)
}) })
} }
for (n in c('niter', 'best_iteration', 'best_ntreelimit')) { for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
if (is.null(x[[n]])) if (is.null(x[[n]]))
next next
cat(n, ': ', x[[n]], '\n', sep = '') cat(n, ': ', x[[n]], '\n', sep = '')
} }
@ -293,10 +293,10 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
} }
} }
if (verbose) if (verbose)
cat('evaluation_log:\n') cat('evaluation_log:\n')
print(x$evaluation_log, row.names = FALSE, ...) print(x$evaluation_log, row.names = FALSE, ...)
if (!is.null(x$best_iteration)) { if (!is.null(x$best_iteration)) {
cat('Best iteration:\n') cat('Best iteration:\n')
print(x$evaluation_log[x$best_iteration], row.names = FALSE, ...) print(x$evaluation_log[x$best_iteration], row.names = FALSE, ...)

View File

@ -42,7 +42,7 @@
#' \itemize{ #' \itemize{
#' \item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below: #' \item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
#' \itemize{ #' \itemize{
#' \item \code{reg:linear} linear regression (Default). #' \item \code{reg:squarederror} Regression with squared loss (Default).
#' \item \code{reg:logistic} logistic regression. #' \item \code{reg:logistic} logistic regression.
#' \item \code{binary:logistic} logistic regression for binary classification. Output probability. #' \item \code{binary:logistic} logistic regression for binary classification. Output probability.
#' \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation. #' \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.

View File

@ -16,7 +16,7 @@ xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
\itemize{ \itemize{
\item \code{objective} objective function, common ones are \item \code{objective} objective function, common ones are
\itemize{ \itemize{
\item \code{reg:linear} linear regression \item \code{reg:squarederror} Regression with squared loss.
\item \code{binary:logistic} logistic regression for classification \item \code{binary:logistic} logistic regression for classification
} }
\item \code{eta} step size of each boosting step \item \code{eta} step size of each boosting step

View File

@ -56,7 +56,7 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
\itemize{ \itemize{
\item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below: \item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
\itemize{ \itemize{
\item \code{reg:linear} linear regression (Default). \item \code{reg:squarederror} Regression with squared loss (Default).
\item \code{reg:logistic} logistic regression. \item \code{reg:logistic} logistic regression.
\item \code{binary:logistic} logistic regression for binary classification. Output probability. \item \code{binary:logistic} logistic regression for binary classification. Output probability.
\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation. \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
@ -210,7 +210,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
watchlist <- list(train = dtrain, eval = dtest) watchlist <- list(train = dtrain, eval = dtest)
## A simple xgb.train example: ## A simple xgb.train example:
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2, param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
objective = "binary:logistic", eval_metric = "auc") objective = "binary:logistic", eval_metric = "auc")
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist) bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
@ -231,12 +231,12 @@ evalerror <- function(preds, dtrain) {
# These functions could be used by passing them either: # These functions could be used by passing them either:
# as 'objective' and 'eval_metric' parameters in the params list: # as 'objective' and 'eval_metric' parameters in the params list:
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2, param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
objective = logregobj, eval_metric = evalerror) objective = logregobj, eval_metric = evalerror)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist) bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
# or through the ... arguments: # or through the ... arguments:
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2) param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
objective = logregobj, eval_metric = evalerror) objective = logregobj, eval_metric = evalerror)
@ -246,7 +246,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
## An xgb.train example of using variable learning rates at each iteration: ## An xgb.train example of using variable learning rates at each iteration:
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2, param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
objective = "binary:logistic", eval_metric = "auc") objective = "binary:logistic", eval_metric = "auc")
my_etas <- list(eta = c(0.5, 0.1)) my_etas <- list(eta = c(0.5, 0.1))
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,

View File

@ -10,12 +10,12 @@ test_label <- agaricus.test$label[1:100]
test_that("xgb.DMatrix: basic construction", { test_that("xgb.DMatrix: basic construction", {
# from sparse matrix # from sparse matrix
dtest1 <- xgb.DMatrix(test_data, label=test_label) dtest1 <- xgb.DMatrix(test_data, label=test_label)
# from dense matrix # from dense matrix
dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label) dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label')) expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label'))
expect_equal(dim(dtest1), dim(dtest2)) expect_equal(dim(dtest1), dim(dtest2))
#from dense integer matrix #from dense integer matrix
int_data <- as.matrix(test_data) int_data <- as.matrix(test_data)
storage.mode(int_data) <- "integer" storage.mode(int_data) <- "integer"
@ -33,7 +33,7 @@ test_that("xgb.DMatrix: saving, loading", {
expect_output(dtest3 <- xgb.DMatrix(tmp_file, silent = TRUE), NA) expect_output(dtest3 <- xgb.DMatrix(tmp_file, silent = TRUE), NA)
unlink(tmp_file) unlink(tmp_file)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label')) expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
# from a libsvm text file # from a libsvm text file
tmp <- c("0 1:1 2:1","1 3:1","0 1:1") tmp <- c("0 1:1 2:1","1 3:1","0 1:1")
tmp_file <- 'tmp.libsvm' tmp_file <- 'tmp.libsvm'
@ -49,7 +49,7 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
expect_true(setinfo(dtest, 'label', test_label)) expect_true(setinfo(dtest, 'label', test_label))
labels <- getinfo(dtest, 'label') labels <- getinfo(dtest, 'label')
expect_equal(test_label, getinfo(dtest, 'label')) expect_equal(test_label, getinfo(dtest, 'label'))
expect_true(length(getinfo(dtest, 'weight')) == 0) expect_true(length(getinfo(dtest, 'weight')) == 0)
expect_true(length(getinfo(dtest, 'base_margin')) == 0) expect_true(length(getinfo(dtest, 'base_margin')) == 0)
@ -57,10 +57,10 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
expect_true(setinfo(dtest, 'base_margin', test_label)) expect_true(setinfo(dtest, 'base_margin', test_label))
expect_true(setinfo(dtest, 'group', c(50,50))) expect_true(setinfo(dtest, 'group', c(50,50)))
expect_error(setinfo(dtest, 'group', test_label)) expect_error(setinfo(dtest, 'group', test_label))
# providing character values will give a warning # providing character values will give a warning
expect_warning( setinfo(dtest, 'weight', rep('a', nrow(test_data))) ) expect_warning( setinfo(dtest, 'weight', rep('a', nrow(test_data))) )
# any other label should error # any other label should error
expect_error(setinfo(dtest, 'asdf', test_label)) expect_error(setinfo(dtest, 'asdf', test_label))
}) })
@ -71,7 +71,7 @@ test_that("xgb.DMatrix: slice, dim", {
dsub1 <- slice(dtest, 1:42) dsub1 <- slice(dtest, 1:42)
expect_equal(nrow(dsub1), 42) expect_equal(nrow(dsub1), 42)
expect_equal(ncol(dsub1), ncol(test_data)) expect_equal(ncol(dsub1), ncol(test_data))
dsub2 <- dtest[1:42,] dsub2 <- dtest[1:42,]
expect_equal(dim(dtest), dim(test_data)) expect_equal(dim(dtest), dim(test_data))
expect_equal(getinfo(dsub1, 'label'), getinfo(dsub2, 'label')) expect_equal(getinfo(dsub1, 'label'), getinfo(dsub2, 'label'))

View File

@ -98,7 +98,7 @@ test_that("SHAP contribution values are not NAN", {
fit <- xgboost( fit <- xgboost(
verbose = 0, verbose = 0,
params = list( params = list(
objective = "reg:linear", objective = "reg:squarederror",
eval_metric = "rmse"), eval_metric = "rmse"),
data = as.matrix(subset(d, fold == 2)[, ivs]), data = as.matrix(subset(d, fold == 2)[, ivs]),
label = subset(d, fold == 2)$y, label = subset(d, fold == 2)$y,

View File

@ -6,9 +6,9 @@ Using XGBoost for regression is very similar to using it for binary classificati
The dataset we used is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware). The demo for regression is almost the same as the [binary classification demo](../binary_classification), except a little difference in general parameter: The dataset we used is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware). The demo for regression is almost the same as the [binary classification demo](../binary_classification), except a little difference in general parameter:
``` ```
# General parameter # General parameter
# this is the only difference with classification, use reg:linear to do linear regression # this is the only difference with classification, use reg:squarederror to do linear regression
# when labels are in [0,1] we can also use reg:logistic # when labels are in [0,1] we can also use reg:logistic
objective = reg:linear objective = reg:squarederror
... ...
``` ```

View File

@ -1,9 +1,9 @@
# General Parameters, see comment for each definition # General Parameters, see comment for each definition
# choose the tree booster, can also change to gblinear # choose the tree booster, can also change to gblinear
booster = gbtree booster = gbtree
# this is the only difference with classification, use reg:linear to do linear classification # this is the only difference with classification, use reg:squarederror to do linear classification
# when labels are in [0,1] we can also use reg:logistic # when labels are in [0,1] we can also use reg:logistic
objective = reg:linear objective = reg:squarederror
# Tree Booster Parameters # Tree Booster Parameters
# step size shrinkage # step size shrinkage

View File

@ -1,17 +1,17 @@
# General Parameters, see comment for each definition # General Parameters, see comment for each definition
# choose the tree booster, can also change to gblinear # choose the tree booster, can also change to gblinear
booster = gbtree booster = gbtree
# this is the only difference with classification, use reg:linear to do linear classification # this is the only difference with classification, use reg:squarederror to do linear classification
# when labels are in [0,1] we can also use reg:logistic # when labels are in [0,1] we can also use reg:logistic
objective = reg:linear objective = reg:squarederror
# Tree Booster Parameters # Tree Booster Parameters
# step size shrinkage # step size shrinkage
eta = 1.0 eta = 1.0
# minimum loss reduction required to make a further partition # minimum loss reduction required to make a further partition
gamma = 1.0 gamma = 1.0
# minimum sum of instance weight(hessian) needed in a child # minimum sum of instance weight(hessian) needed in a child
min_child_weight = 1 min_child_weight = 1
# maximum depth of a tree # maximum depth of a tree
max_depth = 5 max_depth = 5
@ -20,11 +20,10 @@ base_score = 2001
# the number of round to do boosting # the number of round to do boosting
num_round = 100 num_round = 100
# 0 means do not save any model except the final round model # 0 means do not save any model except the final round model
save_period = 0 save_period = 0
# The path of training data # The path of training data
data = "yearpredMSD.libsvm.train" data = "yearpredMSD.libsvm.train"
# The path of validation data, used to monitor training process, here [test] sets name of the validation set # The path of validation data, used to monitor training process, here [test] sets name of the validation set
eval[test] = "yearpredMSD.libsvm.test" eval[test] = "yearpredMSD.libsvm.test"
# The path of test data # The path of test data
#test:data = "yearpredMSD.libsvm.test" #test:data = "yearpredMSD.libsvm.test"

View File

@ -92,7 +92,7 @@ Most of the objective functions implemented in XGBoost can be run on GPU. Follo
+-----------------+-------------+ +-----------------+-------------+
| Objectives | GPU support | | Objectives | GPU support |
+-----------------+-------------+ +-----------------+-------------+
| reg:linear | |tick| | | reg:squarederror| |tick| |
+-----------------+-------------+ +-----------------+-------------+
| reg:logistic | |tick| | | reg:logistic | |tick| |
+-----------------+-------------+ +-----------------+-------------+

View File

@ -293,9 +293,9 @@ Learning Task Parameters
************************ ************************
Specify the learning task and the corresponding learning objective. The objective options are below: Specify the learning task and the corresponding learning objective. The objective options are below:
* ``objective`` [default=reg:linear] * ``objective`` [default=reg:squarederror]
- ``reg:linear``: linear regression - ``reg:squarederror``: regression with squared loss
- ``reg:logistic``: logistic regression - ``reg:logistic``: logistic regression
- ``binary:logistic``: logistic regression for binary classification, output probability - ``binary:logistic``: logistic regression for binary classification, output probability
- ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation - ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation

View File

@ -36,7 +36,7 @@ The following parameters must be set to enable random forest training.
Other parameters should be set in a similar way they are set for gradient boosting. For Other parameters should be set in a similar way they are set for gradient boosting. For
instance, ``objective`` will typically be ``reg:linear`` for regression and instance, ``objective`` will typically be ``reg:squarederror`` for regression and
``binary:logistic`` for classification, ``lambda`` should be set according to a desired ``binary:logistic`` for classification, ``lambda`` should be set according to a desired
regularization weight, etc. regularization weight, etc.

View File

@ -24,8 +24,8 @@ private[spark] trait LearningTaskParams extends Params {
/** /**
* Specify the learning task and the corresponding learning objective. * Specify the learning task and the corresponding learning objective.
* options: reg:linear, reg:logistic, binary:logistic, binary:logitraw, count:poisson, * options: reg:squarederror, reg:logistic, binary:logistic, binary:logitraw, count:poisson,
* multi:softmax, multi:softprob, rank:pairwise, reg:gamma. default: reg:linear * multi:softmax, multi:softprob, rank:pairwise, reg:gamma. default: reg:squarederror
*/ */
final val objective = new Param[String](this, "objective", "objective function used for " + final val objective = new Param[String](this, "objective", "objective function used for " +
s"training, options: {${LearningTaskParams.supportedObjective.mkString(",")}", s"training, options: {${LearningTaskParams.supportedObjective.mkString(",")}",
@ -94,12 +94,12 @@ private[spark] trait LearningTaskParams extends Params {
final def getMaximizeEvaluationMetrics: Boolean = $(maximizeEvaluationMetrics) final def getMaximizeEvaluationMetrics: Boolean = $(maximizeEvaluationMetrics)
setDefault(objective -> "reg:linear", baseScore -> 0.5, setDefault(objective -> "reg:squarederror", baseScore -> 0.5,
trainTestRatio -> 1.0, numEarlyStoppingRounds -> 0) trainTestRatio -> 1.0, numEarlyStoppingRounds -> 0)
} }
private[spark] object LearningTaskParams { private[spark] object LearningTaskParams {
val supportedObjective = HashSet("reg:linear", "reg:logistic", "binary:logistic", val supportedObjective = HashSet("reg:squarederror", "reg:logistic", "binary:logistic",
"binary:logitraw", "count:poisson", "multi:softmax", "multi:softprob", "rank:pairwise", "binary:logitraw", "count:poisson", "multi:softmax", "multi:softprob", "rank:pairwise",
"rank:ndcg", "rank:map", "reg:gamma", "reg:tweedie") "rank:ndcg", "rank:map", "reg:gamma", "reg:tweedie")

View File

@ -96,7 +96,7 @@ class PersistenceSuite extends FunSuite with PerTest with BeforeAndAfterAll {
val testDM = new DMatrix(Regression.test.iterator) val testDM = new DMatrix(Regression.test.iterator)
val paramMap = Map("eta" -> "0.1", "max_depth" -> "6", "silent" -> "1", val paramMap = Map("eta" -> "0.1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> "10", "num_workers" -> numWorkers) "objective" -> "reg:squarederror", "num_round" -> "10", "num_workers" -> numWorkers)
val xgbr = new XGBoostRegressor(paramMap) val xgbr = new XGBoostRegressor(paramMap)
val xgbrPath = new File(tempDir, "xgbr").getPath val xgbrPath = new File(tempDir, "xgbr").getPath
xgbr.write.overwrite().save(xgbrPath) xgbr.write.overwrite().save(xgbrPath)

View File

@ -36,7 +36,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
"eta" -> "1", "eta" -> "1",
"max_depth" -> "6", "max_depth" -> "6",
"silent" -> "1", "silent" -> "1",
"objective" -> "reg:linear") "objective" -> "reg:squarederror")
val model1 = ScalaXGBoost.train(trainingDM, paramMap, round) val model1 = ScalaXGBoost.train(trainingDM, paramMap, round)
val prediction1 = model1.predict(testDM) val prediction1 = model1.predict(testDM)
@ -69,7 +69,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
"eta" -> "1", "eta" -> "1",
"max_depth" -> "6", "max_depth" -> "6",
"silent" -> "1", "silent" -> "1",
"objective" -> "reg:linear", "objective" -> "reg:squarederror",
"num_round" -> round, "num_round" -> round,
"num_workers" -> numWorkers) "num_workers" -> numWorkers)
@ -80,7 +80,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
.setEta(1) .setEta(1)
.setMaxDepth(6) .setMaxDepth(6)
.setSilent(1) .setSilent(1)
.setObjective("reg:linear") .setObjective("reg:squarederror")
.setNumRound(round) .setNumRound(round)
.setNumWorkers(numWorkers) .setNumWorkers(numWorkers)
.fit(trainingDF) .fit(trainingDF)
@ -108,7 +108,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
test("use weight") { test("use weight") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1", val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers) "objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
val getWeightFromId = udf({id: Int => if (id == 0) 1.0f else 0.001f}, DataTypes.FloatType) val getWeightFromId = udf({id: Int => if (id == 0) 1.0f else 0.001f}, DataTypes.FloatType)
val trainingDF = buildDataFrame(Regression.train) val trainingDF = buildDataFrame(Regression.train)
@ -123,7 +123,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
test("test predictionLeaf") { test("test predictionLeaf") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1", val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers) "objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Regression.train) val training = buildDataFrame(Regression.train)
val testDF = buildDataFrame(Regression.test) val testDF = buildDataFrame(Regression.test)
val groundTruth = testDF.count() val groundTruth = testDF.count()
@ -137,7 +137,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
test("test predictionLeaf with empty column name") { test("test predictionLeaf with empty column name") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1", val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers) "objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Regression.train) val training = buildDataFrame(Regression.train)
val testDF = buildDataFrame(Regression.test) val testDF = buildDataFrame(Regression.test)
val xgb = new XGBoostRegressor(paramMap) val xgb = new XGBoostRegressor(paramMap)
@ -149,7 +149,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
test("test predictionContrib") { test("test predictionContrib") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1", val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers) "objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Regression.train) val training = buildDataFrame(Regression.train)
val testDF = buildDataFrame(Regression.test) val testDF = buildDataFrame(Regression.test)
val groundTruth = testDF.count() val groundTruth = testDF.count()
@ -163,7 +163,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
test("test predictionContrib with empty column name") { test("test predictionContrib with empty column name") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1", val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers) "objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Regression.train) val training = buildDataFrame(Regression.train)
val testDF = buildDataFrame(Regression.test) val testDF = buildDataFrame(Regression.test)
val xgb = new XGBoostRegressor(paramMap) val xgb = new XGBoostRegressor(paramMap)
@ -175,7 +175,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
test("test predictionLeaf and predictionContrib") { test("test predictionLeaf and predictionContrib") {
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1", val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers) "objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
val training = buildDataFrame(Regression.train) val training = buildDataFrame(Regression.train)
val testDF = buildDataFrame(Regression.test) val testDF = buildDataFrame(Regression.test)
val groundTruth = testDF.count() val groundTruth = testDF.count()

View File

@ -128,8 +128,8 @@ class RegLossObj : public ObjFunction {
// register the objective functions // register the objective functions
DMLC_REGISTER_PARAMETER(RegLossParam); DMLC_REGISTER_PARAMETER(RegLossParam);
XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear") XGBOOST_REGISTER_OBJECTIVE(SquaredLossRegression, "reg:squarederror")
.describe("Linear regression.") .describe("Regression with squared error.")
.set_body([]() { return new RegLossObj<LinearSquareLoss>(); }); .set_body([]() { return new RegLossObj<LinearSquareLoss>(); });
XGBOOST_REGISTER_OBJECTIVE(LogisticRegression, "reg:logistic") XGBOOST_REGISTER_OBJECTIVE(LogisticRegression, "reg:logistic")
@ -145,7 +145,13 @@ XGBOOST_REGISTER_OBJECTIVE(LogisticRaw, "binary:logitraw")
"before logistic transformation.") "before logistic transformation.")
.set_body([]() { return new RegLossObj<LogisticRaw>(); }); .set_body([]() { return new RegLossObj<LogisticRaw>(); });
// Deprecated GPU functions // Deprecated functions
XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
.describe("Regression with squared error.")
.set_body([]() {
LOG(WARNING) << "reg:linear is now deprecated in favor of reg:squarederror.";
return new RegLossObj<LinearSquareLoss>(); });
XGBOOST_REGISTER_OBJECTIVE(GPULinearRegression, "gpu:reg:linear") XGBOOST_REGISTER_OBJECTIVE(GPULinearRegression, "gpu:reg:linear")
.describe("Deprecated. Linear regression (computed on GPU).") .describe("Deprecated. Linear regression (computed on GPU).")
.set_body([]() { .set_body([]() {

View File

@ -1,4 +1,5 @@
// Copyright by Contributors // Copyright by Contributors
#include <gtest/gtest.h>
#include <xgboost/objective.h> #include <xgboost/objective.h>
#include "../helpers.h" #include "../helpers.h"
@ -6,7 +7,7 @@
TEST(Objective, UnknownFunction) { TEST(Objective, UnknownFunction) {
xgboost::ObjFunction* obj = nullptr; xgboost::ObjFunction* obj = nullptr;
EXPECT_ANY_THROW(obj = xgboost::ObjFunction::Create("unknown_name")); EXPECT_ANY_THROW(obj = xgboost::ObjFunction::Create("unknown_name"));
EXPECT_NO_THROW(obj = xgboost::ObjFunction::Create("reg:linear")); EXPECT_NO_THROW(obj = xgboost::ObjFunction::Create("reg:squarederror"));
if (obj) { if (obj) {
delete obj; delete obj;
} }

View File

@ -1,12 +1,13 @@
/*! /*!
* Copyright 2017-2018 XGBoost contributors * Copyright 2017-2019 XGBoost contributors
*/ */
#include <gtest/gtest.h>
#include <xgboost/objective.h> #include <xgboost/objective.h>
#include "../helpers.h" #include "../helpers.h"
TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) { TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:linear"); xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:squarederror");
std::vector<std::pair<std::string, std::string> > args; std::vector<std::pair<std::string, std::string> > args;
obj->Configure(args); obj->Configure(args);
CheckObjFunction(obj, CheckObjFunction(obj,

View File

@ -132,15 +132,16 @@ def run_suite(param, num_rounds=10, select_datasets=None, scale_features=False):
Run the given parameters on a range of datasets. Objective and eval metric will be automatically set Run the given parameters on a range of datasets. Objective and eval metric will be automatically set
""" """
datasets = [ datasets = [
Dataset("Boston", get_boston, "reg:linear", "rmse"), Dataset("Boston", get_boston, "reg:squarederror", "rmse"),
Dataset("Digits", get_digits, "multi:softmax", "merror"), Dataset("Digits", get_digits, "multi:softmax", "merror"),
Dataset("Cancer", get_cancer, "binary:logistic", "error"), Dataset("Cancer", get_cancer, "binary:logistic", "error"),
Dataset("Sparse regression", get_sparse, "reg:linear", "rmse"), Dataset("Sparse regression", get_sparse, "reg:squarederror", "rmse"),
Dataset("Sparse regression with weights", get_sparse_weights, Dataset("Sparse regression with weights", get_sparse_weights,
"reg:linear", "rmse", has_weights=True), "reg:squarederror", "rmse", has_weights=True),
Dataset("Small weights regression", get_small_weights, Dataset("Small weights regression", get_small_weights,
"reg:linear", "rmse", has_weights=True), "reg:squarederror", "rmse", has_weights=True),
Dataset("Boston External Memory", get_boston, "reg:linear", "rmse", Dataset("Boston External Memory", get_boston,
"reg:squarederror", "rmse",
use_external_memory=True) use_external_memory=True)
] ]

View File

@ -38,7 +38,7 @@ class TestBasic(unittest.TestCase):
def test_basic(self): def test_basic(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
param = {'max_depth': 2, 'eta': 1, 'silent': 1, param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'} 'objective': 'binary:logistic'}
# specify validations set to watch performance # specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')] watchlist = [(dtest, 'eval'), (dtrain, 'train')]
@ -85,7 +85,7 @@ class TestBasic(unittest.TestCase):
def test_record_results(self): def test_record_results(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
param = {'max_depth': 2, 'eta': 1, 'silent': 1, param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'} 'objective': 'binary:logistic'}
# specify validations set to watch performance # specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')] watchlist = [(dtest, 'eval'), (dtrain, 'train')]
@ -102,7 +102,7 @@ class TestBasic(unittest.TestCase):
def test_multiclass(self): def test_multiclass(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'num_class': 2} param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2}
# specify validations set to watch performance # specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')] watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2 num_round = 2
@ -273,7 +273,7 @@ class TestBasic(unittest.TestCase):
def test_cv(self): def test_cv(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train') dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
params = {'max_depth': 2, 'eta': 1, 'silent': 1, params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'} 'objective': 'binary:logistic'}
# return np.ndarray # return np.ndarray
@ -283,7 +283,7 @@ class TestBasic(unittest.TestCase):
def test_cv_no_shuffle(self): def test_cv_no_shuffle(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train') dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
params = {'max_depth': 2, 'eta': 1, 'silent': 1, params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'} 'objective': 'binary:logistic'}
# return np.ndarray # return np.ndarray
@ -294,7 +294,7 @@ class TestBasic(unittest.TestCase):
def test_cv_explicit_fold_indices(self): def test_cv_explicit_fold_indices(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train') dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
'binary:logistic'} 'binary:logistic'}
folds = [ folds = [
# Train Test # Train Test
@ -310,7 +310,7 @@ class TestBasic(unittest.TestCase):
def test_cv_explicit_fold_indices_labels(self): def test_cv_explicit_fold_indices_labels(self):
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective': params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
'reg:linear'} 'reg:squarederror'}
N = 100 N = 100
F = 3 F = 3
dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N)) dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))

View File

@ -11,7 +11,7 @@ rng = np.random.RandomState(1994)
class TestModels(unittest.TestCase): class TestModels(unittest.TestCase):
def test_glm(self): def test_glm(self):
param = {'silent': 1, 'objective': 'binary:logistic', param = {'verbosity': 0, 'objective': 'binary:logistic',
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1, 'nthread': 1} 'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1, 'nthread': 1}
watchlist = [(dtest, 'eval'), (dtrain, 'train')] watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 4 num_round = 4
@ -26,7 +26,7 @@ class TestModels(unittest.TestCase):
def test_dart(self): def test_dart(self):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test') dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
param = {'max_depth': 5, 'objective': 'binary:logistic', 'booster': 'dart', 'silent': False} param = {'max_depth': 5, 'objective': 'binary:logistic', 'booster': 'dart', 'verbosity': 1}
# specify validations set to watch performance # specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')] watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2 num_round = 2
@ -51,7 +51,7 @@ class TestModels(unittest.TestCase):
# check whether sample_type and normalize_type work # check whether sample_type and normalize_type work
num_round = 50 num_round = 50
param['silent'] = True param['verbosity'] = 0
param['learning_rate'] = 0.1 param['learning_rate'] = 0.1
param['rate_drop'] = 0.1 param['rate_drop'] = 0.1
preds_list = [] preds_list = []
@ -74,7 +74,8 @@ class TestModels(unittest.TestCase):
# learning_rates as a list # learning_rates as a list
# init eta with 0 to check whether learning_rates work # init eta with 0 to check whether learning_rates work
param = {'max_depth': 2, 'eta': 0, 'silent': 1, 'objective': 'binary:logistic'} param = {'max_depth': 2, 'eta': 0, 'verbosity': 0,
'objective': 'binary:logistic'}
evals_result = {} evals_result = {}
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5], bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5],
evals_result=evals_result) evals_result=evals_result)
@ -84,7 +85,8 @@ class TestModels(unittest.TestCase):
assert eval_errors[0] > eval_errors[-1] assert eval_errors[0] > eval_errors[-1]
# init learning_rate with 0 to check whether learning_rates work # init learning_rate with 0 to check whether learning_rates work
param = {'max_depth': 2, 'learning_rate': 0, 'silent': 1, 'objective': 'binary:logistic'} param = {'max_depth': 2, 'learning_rate': 0, 'verbosity': 0,
'objective': 'binary:logistic'}
evals_result = {} evals_result = {}
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5], bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5],
evals_result=evals_result) evals_result=evals_result)
@ -94,7 +96,7 @@ class TestModels(unittest.TestCase):
assert eval_errors[0] > eval_errors[-1] assert eval_errors[0] > eval_errors[-1]
# check if learning_rates override default value of eta/learning_rate # check if learning_rates override default value of eta/learning_rate
param = {'max_depth': 2, 'silent': 1, 'objective': 'binary:logistic'} param = {'max_depth': 2, 'verbosity': 0, 'objective': 'binary:logistic'}
evals_result = {} evals_result = {}
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0, 0, 0, 0], bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0, 0, 0, 0],
evals_result=evals_result) evals_result=evals_result)
@ -111,7 +113,7 @@ class TestModels(unittest.TestCase):
assert isinstance(bst, xgb.core.Booster) assert isinstance(bst, xgb.core.Booster)
def test_custom_objective(self): def test_custom_objective(self):
param = {'max_depth': 2, 'eta': 1, 'silent': 1} param = {'max_depth': 2, 'eta': 1, 'verbosity': 0}
watchlist = [(dtest, 'eval'), (dtrain, 'train')] watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2 num_round = 2
@ -152,7 +154,8 @@ class TestModels(unittest.TestCase):
def test_multi_eval_metric(self): def test_multi_eval_metric(self):
watchlist = [(dtest, 'eval'), (dtrain, 'train')] watchlist = [(dtest, 'eval'), (dtrain, 'train')]
param = {'max_depth': 2, 'eta': 0.2, 'silent': 1, 'objective': 'binary:logistic'} param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 0,
'objective': 'binary:logistic'}
param['eval_metric'] = ["auc", "logloss", 'error'] param['eval_metric'] = ["auc", "logloss", 'error']
evals_result = {} evals_result = {}
bst = xgb.train(param, dtrain, 4, watchlist, evals_result=evals_result) bst = xgb.train(param, dtrain, 4, watchlist, evals_result=evals_result)
@ -161,7 +164,7 @@ class TestModels(unittest.TestCase):
assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'} assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'}
def test_fpreproc(self): def test_fpreproc(self):
param = {'max_depth': 2, 'eta': 1, 'silent': 1, param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'} 'objective': 'binary:logistic'}
num_round = 2 num_round = 2
@ -175,7 +178,7 @@ class TestModels(unittest.TestCase):
metrics={'auc'}, seed=0, fpreproc=fpreproc) metrics={'auc'}, seed=0, fpreproc=fpreproc)
def test_show_stdv(self): def test_show_stdv(self):
param = {'max_depth': 2, 'eta': 1, 'silent': 1, param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'} 'objective': 'binary:logistic'}
num_round = 2 num_round = 2
xgb.cv(param, dtrain, num_round, nfold=5, xgb.cv(param, dtrain, num_round, nfold=5,

View File

@ -52,7 +52,7 @@ class TestEarlyStopping(unittest.TestCase):
X = digits['data'] X = digits['data']
y = digits['target'] y = digits['target']
dm = xgb.DMatrix(X, label=y) dm = xgb.DMatrix(X, label=y)
params = {'max_depth': 2, 'eta': 1, 'silent': 1, params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'} 'objective': 'binary:logistic'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,

View File

@ -9,25 +9,25 @@ rng = np.random.RandomState(1337)
class TestEvalMetrics(unittest.TestCase): class TestEvalMetrics(unittest.TestCase):
xgb_params_01 = { xgb_params_01 = {
'silent': 1, 'verbosity': 0,
'nthread': 1, 'nthread': 1,
'eval_metric': 'error' 'eval_metric': 'error'
} }
xgb_params_02 = { xgb_params_02 = {
'silent': 1, 'verbosity': 0,
'nthread': 1, 'nthread': 1,
'eval_metric': ['error'] 'eval_metric': ['error']
} }
xgb_params_03 = { xgb_params_03 = {
'silent': 1, 'verbosity': 0,
'nthread': 1, 'nthread': 1,
'eval_metric': ['rmse', 'error'] 'eval_metric': ['rmse', 'error']
} }
xgb_params_04 = { xgb_params_04 = {
'silent': 1, 'verbosity': 0,
'nthread': 1, 'nthread': 1,
'eval_metric': ['error', 'rmse'] 'eval_metric': ['error', 'rmse']
} }

View File

@ -18,7 +18,7 @@ class TestInteractionConstraints(unittest.TestCase):
X = np.column_stack((x1, x2, x3)) X = np.column_stack((x1, x2, x3))
dtrain = xgboost.DMatrix(X, label=y) dtrain = xgboost.DMatrix(X, label=y)
params = {'max_depth': 3, 'eta': 0.1, 'nthread': 2, 'silent': 1, params = {'max_depth': 3, 'eta': 0.1, 'nthread': 2, 'verbosity': 0,
'interaction_constraints': '[[0, 1]]'} 'interaction_constraints': '[[0, 1]]'}
num_boost_round = 100 num_boost_round = 100
# Fit a model that only allows interaction between x1 and x2 # Fit a model that only allows interaction between x1 and x2

View File

@ -30,7 +30,7 @@ def xgb_get_weights(bst):
def assert_regression_result(results, tol): def assert_regression_result(results, tol):
regression_results = [r for r in results if regression_results = [r for r in results if
r["param"]["objective"] == "reg:linear"] r["param"]["objective"] == "reg:squarederror"]
for res in regression_results: for res in regression_results:
X = scale(res["dataset"].X, X = scale(res["dataset"].X,
with_mean=isinstance(res["dataset"].X, np.ndarray)) with_mean=isinstance(res["dataset"].X, np.ndarray))
@ -52,7 +52,7 @@ def assert_regression_result(results, tol):
# TODO: More robust classification tests # TODO: More robust classification tests
def assert_classification_result(results): def assert_classification_result(results):
classification_results = [r for r in results if classification_results = [r for r in results if
r["param"]["objective"] != "reg:linear"] r["param"]["objective"] != "reg:squarederror"]
for res in classification_results: for res in classification_results:
# Check accuracy is reasonable # Check accuracy is reasonable
assert res["eval"][-1] < 0.5, (res["dataset"].name, res["eval"][-1]) assert res["eval"][-1] < 0.5, (res["dataset"].name, res["eval"][-1])

View File

@ -16,7 +16,8 @@ class TestTreesToDataFrame(unittest.TestCase):
def build_model(self, max_depth, num_round): def build_model(self, max_depth, num_round):
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train') dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
param = {'max_depth': max_depth, 'objective': 'binary:logistic', 'silent': False} param = {'max_depth': max_depth, 'objective': 'binary:logistic',
'verbosity': 1}
num_round = num_round num_round = num_round
bst = xgb.train(param, dtrain, num_round) bst = xgb.train(param, dtrain, num_round)
return bst return bst

View File

@ -51,7 +51,7 @@ class TestSHAP(unittest.TestCase):
def fn(max_depth, num_rounds): def fn(max_depth, num_rounds):
# train # train
params = {'max_depth': max_depth, 'eta': 1, 'silent': 1} params = {'max_depth': max_depth, 'eta': 1, 'verbosity': 0}
bst = xgb.train(params, dtrain, num_boost_round=num_rounds) bst = xgb.train(params, dtrain, num_boost_round=num_rounds)
# predict # predict

View File

@ -4,7 +4,7 @@ from scipy.sparse import rand
rng = np.random.RandomState(1) rng = np.random.RandomState(1)
param = {'max_depth': 3, 'objective': 'binary:logistic', 'silent': 1} param = {'max_depth': 3, 'objective': 'binary:logistic', 'verbosity': 0}
def test_sparse_dmatrix_csr(): def test_sparse_dmatrix_csr():

View File

@ -11,18 +11,18 @@ class TestTrainingContinuation(unittest.TestCase):
num_parallel_tree = 3 num_parallel_tree = 3
xgb_params_01 = { xgb_params_01 = {
'silent': 1, 'verbosity': 0,
'nthread': 1, 'nthread': 1,
} }
xgb_params_02 = { xgb_params_02 = {
'silent': 1, 'verbosity': 0,
'nthread': 1, 'nthread': 1,
'num_parallel_tree': num_parallel_tree 'num_parallel_tree': num_parallel_tree
} }
xgb_params_03 = { xgb_params_03 = {
'silent': 1, 'verbosity': 0,
'nthread': 1, 'nthread': 1,
'num_class': 5, 'num_class': 5,
'num_parallel_tree': num_parallel_tree 'num_parallel_tree': num_parallel_tree

View File

@ -10,7 +10,8 @@ train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))
class TestTreeRegularization(unittest.TestCase): class TestTreeRegularization(unittest.TestCase):
def test_alpha(self): def test_alpha(self):
params = { params = {
'tree_method': 'exact', 'silent': 1, 'objective': 'reg:linear', 'tree_method': 'exact', 'verbosity': 0,
'objective': 'reg:squarederror',
'eta': 1, 'eta': 1,
'lambda': 0, 'lambda': 0,
'alpha': 0.1 'alpha': 0.1
@ -27,7 +28,8 @@ class TestTreeRegularization(unittest.TestCase):
def test_lambda(self): def test_lambda(self):
params = { params = {
'tree_method': 'exact', 'silent': 1, 'objective': 'reg:linear', 'tree_method': 'exact', 'verbosity': 0,
'objective': 'reg:squarederror',
'eta': 1, 'eta': 1,
'lambda': 1, 'lambda': 1,
'alpha': 0 'alpha': 0
@ -44,7 +46,8 @@ class TestTreeRegularization(unittest.TestCase):
def test_alpha_and_lambda(self): def test_alpha_and_lambda(self):
params = { params = {
'tree_method': 'exact', 'silent': 1, 'objective': 'reg:linear', 'tree_method': 'exact', 'verbosity': 1,
'objective': 'reg:squarederror',
'eta': 1, 'eta': 1,
'lambda': 1, 'lambda': 1,
'alpha': 0.1 'alpha': 0.1

View File

@ -33,7 +33,7 @@ class TestUpdaters(unittest.TestCase):
'max_bin': [2, 256], 'max_bin': [2, 256],
'grow_policy': ['depthwise', 'lossguide'], 'grow_policy': ['depthwise', 'lossguide'],
'max_leaves': [64, 0], 'max_leaves': [64, 0],
'silent': [1]} 'verbosity': [0]}
for param in parameter_combinations(variable_param): for param in parameter_combinations(variable_param):
result = run_suite(param) result = run_suite(param)
assert_results_non_increasing(result, 1e-2) assert_results_non_increasing(result, 1e-2)
@ -45,7 +45,7 @@ class TestUpdaters(unittest.TestCase):
ag_param = {'max_depth': 2, ag_param = {'max_depth': 2,
'tree_method': 'hist', 'tree_method': 'hist',
'eta': 1, 'eta': 1,
'silent': 1, 'verbosity': 0,
'objective': 'binary:logistic', 'objective': 'binary:logistic',
'eval_metric': 'auc'} 'eval_metric': 'auc'}
hist_res = {} hist_res = {}

View File

@ -120,7 +120,7 @@ class TestPandas(unittest.TestCase):
def test_cv_as_pandas(self): def test_cv_as_pandas(self):
dm = xgb.DMatrix(dpath + 'agaricus.txt.train') dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
params = {'max_depth': 2, 'eta': 1, 'silent': 1, params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'} 'objective': 'binary:logistic'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10) cv = xgb.cv(params, dm, num_boost_round=10, nfold=10)
@ -143,19 +143,19 @@ class TestPandas(unittest.TestCase):
u'train-error-mean', u'train-error-std']) u'train-error-mean', u'train-error-std'])
assert cv.columns.equals(exp) assert cv.columns.equals(exp)
params = {'max_depth': 2, 'eta': 1, 'silent': 1, params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic', 'eval_metric': 'auc'} 'objective': 'binary:logistic', 'eval_metric': 'auc'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True) cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
assert 'eval_metric' in params assert 'eval_metric' in params
assert 'auc' in cv.columns[0] assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic', 'eval_metric': ['auc']} 'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True) cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
assert 'eval_metric' in params assert 'eval_metric' in params
assert 'auc' in cv.columns[0] assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic', 'eval_metric': ['auc']} 'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
as_pandas=True, early_stopping_rounds=1) as_pandas=True, early_stopping_rounds=1)
@ -163,19 +163,19 @@ class TestPandas(unittest.TestCase):
assert 'auc' in cv.columns[0] assert 'auc' in cv.columns[0]
assert cv.shape[0] < 10 assert cv.shape[0] < 10
params = {'max_depth': 2, 'eta': 1, 'silent': 1, params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'} 'objective': 'binary:logistic'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
as_pandas=True, metrics='auc') as_pandas=True, metrics='auc')
assert 'auc' in cv.columns[0] assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic'} 'objective': 'binary:logistic'}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
as_pandas=True, metrics=['auc']) as_pandas=True, metrics=['auc'])
assert 'auc' in cv.columns[0] assert 'auc' in cv.columns[0]
params = {'max_depth': 2, 'eta': 1, 'silent': 1, params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
'objective': 'binary:logistic', 'eval_metric': ['auc']} 'objective': 'binary:logistic', 'eval_metric': ['auc']}
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
as_pandas=True, metrics='error') as_pandas=True, metrics='error')

View File

@ -603,7 +603,8 @@ def test_RFECV():
# Regression # Regression
X, y = load_boston(return_X_y=True) X, y = load_boston(return_X_y=True)
bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1, bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
n_estimators=10, n_jobs=1, objective='reg:linear', n_estimators=10, n_jobs=1,
objective='reg:squarederror',
random_state=0, verbosity=0) random_state=0, verbosity=0)
rfecv = RFECV( rfecv = RFECV(
estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error') estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error')