Deprecate reg:linear' in favor of reg:squarederror'. (#4267)
* Deprecate `reg:linear' in favor of `reg:squarederror'. * Replace the use of `reg:linear'. * Replace the use of `silent`.
This commit is contained in:
parent
cf8d5b9b76
commit
29a1356669
@ -28,12 +28,12 @@ NVL <- function(x, val) {
|
|||||||
# Merges booster params with whatever is provided in ...
|
# Merges booster params with whatever is provided in ...
|
||||||
# plus runs some checks
|
# plus runs some checks
|
||||||
check.booster.params <- function(params, ...) {
|
check.booster.params <- function(params, ...) {
|
||||||
if (typeof(params) != "list")
|
if (typeof(params) != "list")
|
||||||
stop("params must be a list")
|
stop("params must be a list")
|
||||||
|
|
||||||
# in R interface, allow for '.' instead of '_' in parameter names
|
# in R interface, allow for '.' instead of '_' in parameter names
|
||||||
names(params) <- gsub("\\.", "_", names(params))
|
names(params) <- gsub("\\.", "_", names(params))
|
||||||
|
|
||||||
# merge parameters from the params and the dots-expansion
|
# merge parameters from the params and the dots-expansion
|
||||||
dot_params <- list(...)
|
dot_params <- list(...)
|
||||||
names(dot_params) <- gsub("\\.", "_", names(dot_params))
|
names(dot_params) <- gsub("\\.", "_", names(dot_params))
|
||||||
@ -41,15 +41,15 @@ check.booster.params <- function(params, ...) {
|
|||||||
names(dot_params))) > 0)
|
names(dot_params))) > 0)
|
||||||
stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.")
|
stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.")
|
||||||
params <- c(params, dot_params)
|
params <- c(params, dot_params)
|
||||||
|
|
||||||
# providing a parameter multiple times makes sense only for 'eval_metric'
|
# providing a parameter multiple times makes sense only for 'eval_metric'
|
||||||
name_freqs <- table(names(params))
|
name_freqs <- table(names(params))
|
||||||
multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric')
|
multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric')
|
||||||
if (length(multi_names) > 0) {
|
if (length(multi_names) > 0) {
|
||||||
warning("The following parameters were provided multiple times:\n\t",
|
warning("The following parameters were provided multiple times:\n\t",
|
||||||
paste(multi_names, collapse = ', '), "\n Only the last value for each of them will be used.\n")
|
paste(multi_names, collapse = ', '), "\n Only the last value for each of them will be used.\n")
|
||||||
# While xgboost internals would choose the last value for a multiple-times parameter,
|
# While xgboost internals would choose the last value for a multiple-times parameter,
|
||||||
# enforce it here in R as well (b/c multi-parameters might be used further in R code,
|
# enforce it here in R as well (b/c multi-parameters might be used further in R code,
|
||||||
# and R takes the 1st value when multiple elements with the same name are present in a list).
|
# and R takes the 1st value when multiple elements with the same name are present in a list).
|
||||||
for (n in multi_names) {
|
for (n in multi_names) {
|
||||||
del_idx <- which(n == names(params))
|
del_idx <- which(n == names(params))
|
||||||
@ -57,25 +57,25 @@ check.booster.params <- function(params, ...) {
|
|||||||
params[[del_idx]] <- NULL
|
params[[del_idx]] <- NULL
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# for multiclass, expect num_class to be set
|
# for multiclass, expect num_class to be set
|
||||||
if (typeof(params[['objective']]) == "character" &&
|
if (typeof(params[['objective']]) == "character" &&
|
||||||
substr(NVL(params[['objective']], 'x'), 1, 6) == 'multi:' &&
|
substr(NVL(params[['objective']], 'x'), 1, 6) == 'multi:' &&
|
||||||
as.numeric(NVL(params[['num_class']], 0)) < 2) {
|
as.numeric(NVL(params[['num_class']], 0)) < 2) {
|
||||||
stop("'num_class' > 1 parameter must be set for multiclass classification")
|
stop("'num_class' > 1 parameter must be set for multiclass classification")
|
||||||
}
|
}
|
||||||
|
|
||||||
# monotone_constraints parser
|
# monotone_constraints parser
|
||||||
|
|
||||||
if (!is.null(params[['monotone_constraints']]) &&
|
if (!is.null(params[['monotone_constraints']]) &&
|
||||||
typeof(params[['monotone_constraints']]) != "character") {
|
typeof(params[['monotone_constraints']]) != "character") {
|
||||||
vec2str = paste(params[['monotone_constraints']], collapse = ',')
|
vec2str = paste(params[['monotone_constraints']], collapse = ',')
|
||||||
vec2str = paste0('(', vec2str, ')')
|
vec2str = paste0('(', vec2str, ')')
|
||||||
params[['monotone_constraints']] = vec2str
|
params[['monotone_constraints']] = vec2str
|
||||||
}
|
}
|
||||||
|
|
||||||
# interaction constraints parser (convert from list of column indices to string)
|
# interaction constraints parser (convert from list of column indices to string)
|
||||||
if (!is.null(params[['interaction_constraints']]) &&
|
if (!is.null(params[['interaction_constraints']]) &&
|
||||||
typeof(params[['interaction_constraints']]) != "character"){
|
typeof(params[['interaction_constraints']]) != "character"){
|
||||||
# check input class
|
# check input class
|
||||||
if (class(params[['interaction_constraints']]) != 'list') stop('interaction_constraints should be class list')
|
if (class(params[['interaction_constraints']]) != 'list') stop('interaction_constraints should be class list')
|
||||||
@ -96,10 +96,10 @@ check.booster.params <- function(params, ...) {
|
|||||||
check.custom.obj <- function(env = parent.frame()) {
|
check.custom.obj <- function(env = parent.frame()) {
|
||||||
if (!is.null(env$params[['objective']]) && !is.null(env$obj))
|
if (!is.null(env$params[['objective']]) && !is.null(env$obj))
|
||||||
stop("Setting objectives in 'params' and 'obj' at the same time is not allowed")
|
stop("Setting objectives in 'params' and 'obj' at the same time is not allowed")
|
||||||
|
|
||||||
if (!is.null(env$obj) && typeof(env$obj) != 'closure')
|
if (!is.null(env$obj) && typeof(env$obj) != 'closure')
|
||||||
stop("'obj' must be a function")
|
stop("'obj' must be a function")
|
||||||
|
|
||||||
# handle the case when custom objective function was provided through params
|
# handle the case when custom objective function was provided through params
|
||||||
if (!is.null(env$params[['objective']]) &&
|
if (!is.null(env$params[['objective']]) &&
|
||||||
typeof(env$params$objective) == 'closure') {
|
typeof(env$params$objective) == 'closure') {
|
||||||
@ -113,21 +113,21 @@ check.custom.obj <- function(env = parent.frame()) {
|
|||||||
check.custom.eval <- function(env = parent.frame()) {
|
check.custom.eval <- function(env = parent.frame()) {
|
||||||
if (!is.null(env$params[['eval_metric']]) && !is.null(env$feval))
|
if (!is.null(env$params[['eval_metric']]) && !is.null(env$feval))
|
||||||
stop("Setting evaluation metrics in 'params' and 'feval' at the same time is not allowed")
|
stop("Setting evaluation metrics in 'params' and 'feval' at the same time is not allowed")
|
||||||
|
|
||||||
if (!is.null(env$feval) && typeof(env$feval) != 'closure')
|
if (!is.null(env$feval) && typeof(env$feval) != 'closure')
|
||||||
stop("'feval' must be a function")
|
stop("'feval' must be a function")
|
||||||
|
|
||||||
# handle a situation when custom eval function was provided through params
|
# handle a situation when custom eval function was provided through params
|
||||||
if (!is.null(env$params[['eval_metric']]) &&
|
if (!is.null(env$params[['eval_metric']]) &&
|
||||||
typeof(env$params$eval_metric) == 'closure') {
|
typeof(env$params$eval_metric) == 'closure') {
|
||||||
env$feval <- env$params$eval_metric
|
env$feval <- env$params$eval_metric
|
||||||
env$params$eval_metric <- NULL
|
env$params$eval_metric <- NULL
|
||||||
}
|
}
|
||||||
|
|
||||||
# require maximize to be set when custom feval and early stopping are used together
|
# require maximize to be set when custom feval and early stopping are used together
|
||||||
if (!is.null(env$feval) &&
|
if (!is.null(env$feval) &&
|
||||||
is.null(env$maximize) && (
|
is.null(env$maximize) && (
|
||||||
!is.null(env$early_stopping_rounds) ||
|
!is.null(env$early_stopping_rounds) ||
|
||||||
has.callbacks(env$callbacks, 'cb.early.stop')))
|
has.callbacks(env$callbacks, 'cb.early.stop')))
|
||||||
stop("Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not")
|
stop("Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not")
|
||||||
}
|
}
|
||||||
@ -154,15 +154,15 @@ xgb.iter.update <- function(booster_handle, dtrain, iter, obj = NULL) {
|
|||||||
|
|
||||||
|
|
||||||
# Evaluate one iteration.
|
# Evaluate one iteration.
|
||||||
# Returns a named vector of evaluation metrics
|
# Returns a named vector of evaluation metrics
|
||||||
# with the names in a 'datasetname-metricname' format.
|
# with the names in a 'datasetname-metricname' format.
|
||||||
xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
|
xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
|
||||||
if (!identical(class(booster_handle), "xgb.Booster.handle"))
|
if (!identical(class(booster_handle), "xgb.Booster.handle"))
|
||||||
stop("class of booster_handle must be xgb.Booster.handle")
|
stop("class of booster_handle must be xgb.Booster.handle")
|
||||||
|
|
||||||
if (length(watchlist) == 0)
|
if (length(watchlist) == 0)
|
||||||
return(NULL)
|
return(NULL)
|
||||||
|
|
||||||
evnames <- names(watchlist)
|
evnames <- names(watchlist)
|
||||||
if (is.null(feval)) {
|
if (is.null(feval)) {
|
||||||
msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames))
|
msg <- .Call(XGBoosterEvalOneIter_R, booster_handle, as.integer(iter), watchlist, as.list(evnames))
|
||||||
@ -189,7 +189,7 @@ xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
|
|||||||
|
|
||||||
# Generates random (stratified if needed) CV folds
|
# Generates random (stratified if needed) CV folds
|
||||||
generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
|
generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
|
||||||
|
|
||||||
# cannot do it for rank
|
# cannot do it for rank
|
||||||
if (exists('objective', where = params) &&
|
if (exists('objective', where = params) &&
|
||||||
is.character(params$objective) &&
|
is.character(params$objective) &&
|
||||||
@ -209,13 +209,14 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
|
|||||||
if (exists('objective', where = params) &&
|
if (exists('objective', where = params) &&
|
||||||
is.character(params$objective)) {
|
is.character(params$objective)) {
|
||||||
# If 'objective' provided in params, assume that y is a classification label
|
# If 'objective' provided in params, assume that y is a classification label
|
||||||
# unless objective is reg:linear
|
# unless objective is reg:squarederror
|
||||||
if (params$objective != 'reg:linear')
|
if (params$objective != 'reg:squarederror')
|
||||||
y <- factor(y)
|
y <- factor(y)
|
||||||
} else {
|
} else {
|
||||||
# If no 'objective' given in params, it means that user either wants to use
|
# If no 'objective' given in params, it means that user either wants to
|
||||||
# the default 'reg:linear' objective or has provided a custom obj function.
|
# use the default 'reg:squarederror' objective or has provided a custom
|
||||||
# Here, assume classification setting when y has 5 or less unique values:
|
# obj function. Here, assume classification setting when y has 5 or less
|
||||||
|
# unique values:
|
||||||
if (length(unique(y)) <= 5)
|
if (length(unique(y)) <= 5)
|
||||||
y <- factor(y)
|
y <- factor(y)
|
||||||
}
|
}
|
||||||
@ -293,22 +294,22 @@ xgb.createFolds <- function(y, k = 10)
|
|||||||
#
|
#
|
||||||
|
|
||||||
#' Deprecation notices.
|
#' Deprecation notices.
|
||||||
#'
|
#'
|
||||||
#' At this time, some of the parameter names were changed in order to make the code style more uniform.
|
#' At this time, some of the parameter names were changed in order to make the code style more uniform.
|
||||||
#' The deprecated parameters would be removed in the next release.
|
#' The deprecated parameters would be removed in the next release.
|
||||||
#'
|
#'
|
||||||
#' To see all the current deprecated and new parameters, check the \code{xgboost:::depr_par_lut} table.
|
#' To see all the current deprecated and new parameters, check the \code{xgboost:::depr_par_lut} table.
|
||||||
#'
|
#'
|
||||||
#' A deprecation warning is shown when any of the deprecated parameters is used in a call.
|
#' A deprecation warning is shown when any of the deprecated parameters is used in a call.
|
||||||
#' An additional warning is shown when there was a partial match to a deprecated parameter
|
#' An additional warning is shown when there was a partial match to a deprecated parameter
|
||||||
#' (as R is able to partially match parameter names).
|
#' (as R is able to partially match parameter names).
|
||||||
#'
|
#'
|
||||||
#' @name xgboost-deprecated
|
#' @name xgboost-deprecated
|
||||||
NULL
|
NULL
|
||||||
|
|
||||||
# Lookup table for the deprecated parameters bookkeeping
|
# Lookup table for the deprecated parameters bookkeeping
|
||||||
depr_par_lut <- matrix(c(
|
depr_par_lut <- matrix(c(
|
||||||
'print.every.n', 'print_every_n',
|
'print.every.n', 'print_every_n',
|
||||||
'early.stop.round', 'early_stopping_rounds',
|
'early.stop.round', 'early_stopping_rounds',
|
||||||
'training.data', 'data',
|
'training.data', 'data',
|
||||||
'with.stats', 'with_stats',
|
'with.stats', 'with_stats',
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
#' Cross Validation
|
#' Cross Validation
|
||||||
#'
|
#'
|
||||||
#' The cross validation function of xgboost
|
#' The cross validation function of xgboost
|
||||||
#'
|
#'
|
||||||
#' @param params the list of parameters. Commonly used ones are:
|
#' @param params the list of parameters. Commonly used ones are:
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{objective} objective function, common ones are
|
#' \item \code{objective} objective function, common ones are
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{reg:linear} linear regression
|
#' \item \code{reg:squarederror} Regression with squared loss
|
||||||
#' \item \code{binary:logistic} logistic regression for classification
|
#' \item \code{binary:logistic} logistic regression for classification
|
||||||
#' }
|
#' }
|
||||||
#' \item \code{eta} step size of each boosting step
|
#' \item \code{eta} step size of each boosting step
|
||||||
@ -18,12 +18,12 @@
|
|||||||
#' See also demo/ for walkthrough example in R.
|
#' See also demo/ for walkthrough example in R.
|
||||||
#' @param data takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input.
|
#' @param data takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input.
|
||||||
#' @param nrounds the max number of iterations
|
#' @param nrounds the max number of iterations
|
||||||
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
|
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
|
||||||
#' @param label vector of response values. Should be provided only when data is an R-matrix.
|
#' @param label vector of response values. Should be provided only when data is an R-matrix.
|
||||||
#' @param missing is only used when input is a dense matrix. By default is set to NA, which means
|
#' @param missing is only used when input is a dense matrix. By default is set to NA, which means
|
||||||
#' that NA values should be considered as 'missing' by the algorithm.
|
#' that NA values should be considered as 'missing' by the algorithm.
|
||||||
#' Sometimes, 0 or other extreme value might be used to represent missing values.
|
#' Sometimes, 0 or other extreme value might be used to represent missing values.
|
||||||
#' @param prediction A logical value indicating whether to return the test fold predictions
|
#' @param prediction A logical value indicating whether to return the test fold predictions
|
||||||
#' from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.
|
#' from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.
|
||||||
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation
|
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation
|
||||||
#' @param metrics, list of evaluation metrics to be used in cross validation,
|
#' @param metrics, list of evaluation metrics to be used in cross validation,
|
||||||
@ -37,22 +37,22 @@
|
|||||||
#' \item \code{aucpr} Area under PR curve
|
#' \item \code{aucpr} Area under PR curve
|
||||||
#' \item \code{merror} Exact matching error, used to evaluate multi-class classification
|
#' \item \code{merror} Exact matching error, used to evaluate multi-class classification
|
||||||
#' }
|
#' }
|
||||||
#' @param obj customized objective function. Returns gradient and second order
|
#' @param obj customized objective function. Returns gradient and second order
|
||||||
#' gradient with given prediction and dtrain.
|
#' gradient with given prediction and dtrain.
|
||||||
#' @param feval custimized evaluation function. Returns
|
#' @param feval custimized evaluation function. Returns
|
||||||
#' \code{list(metric='metric-name', value='metric-value')} with given
|
#' \code{list(metric='metric-name', value='metric-value')} with given
|
||||||
#' prediction and dtrain.
|
#' prediction and dtrain.
|
||||||
#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
|
#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
|
||||||
#' by the values of outcome labels.
|
#' by the values of outcome labels.
|
||||||
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
|
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
|
||||||
#' (each element must be a vector of test fold's indices). When folds are supplied,
|
#' (each element must be a vector of test fold's indices). When folds are supplied,
|
||||||
#' the \code{nfold} and \code{stratified} parameters are ignored.
|
#' the \code{nfold} and \code{stratified} parameters are ignored.
|
||||||
#' @param verbose \code{boolean}, print the statistics during the process
|
#' @param verbose \code{boolean}, print the statistics during the process
|
||||||
#' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
|
#' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
|
||||||
#' Default is 1 which means all messages are printed. This parameter is passed to the
|
#' Default is 1 which means all messages are printed. This parameter is passed to the
|
||||||
#' \code{\link{cb.print.evaluation}} callback.
|
#' \code{\link{cb.print.evaluation}} callback.
|
||||||
#' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
|
#' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
|
||||||
#' If set to an integer \code{k}, training with a validation set will stop if the performance
|
#' If set to an integer \code{k}, training with a validation set will stop if the performance
|
||||||
#' doesn't improve for \code{k} rounds.
|
#' doesn't improve for \code{k} rounds.
|
||||||
#' Setting this parameter engages the \code{\link{cb.early.stop}} callback.
|
#' Setting this parameter engages the \code{\link{cb.early.stop}} callback.
|
||||||
#' @param maximize If \code{feval} and \code{early_stopping_rounds} are set,
|
#' @param maximize If \code{feval} and \code{early_stopping_rounds} are set,
|
||||||
@ -60,46 +60,46 @@
|
|||||||
#' When it is \code{TRUE}, it means the larger the evaluation score the better.
|
#' When it is \code{TRUE}, it means the larger the evaluation score the better.
|
||||||
#' This parameter is passed to the \code{\link{cb.early.stop}} callback.
|
#' This parameter is passed to the \code{\link{cb.early.stop}} callback.
|
||||||
#' @param callbacks a list of callback functions to perform various task during boosting.
|
#' @param callbacks a list of callback functions to perform various task during boosting.
|
||||||
#' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
|
#' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
|
||||||
#' parameters' values. User can provide either existing or their own callback methods in order
|
#' parameters' values. User can provide either existing or their own callback methods in order
|
||||||
#' to customize the training process.
|
#' to customize the training process.
|
||||||
#' @param ... other parameters to pass to \code{params}.
|
#' @param ... other parameters to pass to \code{params}.
|
||||||
#'
|
#'
|
||||||
#' @details
|
#' @details
|
||||||
#' The original sample is randomly partitioned into \code{nfold} equal size subsamples.
|
#' The original sample is randomly partitioned into \code{nfold} equal size subsamples.
|
||||||
#'
|
#'
|
||||||
#' Of the \code{nfold} subsamples, a single subsample is retained as the validation data for testing the model, and the remaining \code{nfold - 1} subsamples are used as training data.
|
#' Of the \code{nfold} subsamples, a single subsample is retained as the validation data for testing the model, and the remaining \code{nfold - 1} subsamples are used as training data.
|
||||||
#'
|
#'
|
||||||
#' The cross-validation process is then repeated \code{nrounds} times, with each of the \code{nfold} subsamples used exactly once as the validation data.
|
#' The cross-validation process is then repeated \code{nrounds} times, with each of the \code{nfold} subsamples used exactly once as the validation data.
|
||||||
#'
|
#'
|
||||||
#' All observations are used for both training and validation.
|
#' All observations are used for both training and validation.
|
||||||
#'
|
#'
|
||||||
#' Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
|
#' Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
|
||||||
#'
|
#'
|
||||||
#' @return
|
#' @return
|
||||||
#' An object of class \code{xgb.cv.synchronous} with the following elements:
|
#' An object of class \code{xgb.cv.synchronous} with the following elements:
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{call} a function call.
|
#' \item \code{call} a function call.
|
||||||
#' \item \code{params} parameters that were passed to the xgboost library. Note that it does not
|
#' \item \code{params} parameters that were passed to the xgboost library. Note that it does not
|
||||||
#' capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
|
#' capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
|
||||||
#' \item \code{callbacks} callback functions that were either automatically assigned or
|
#' \item \code{callbacks} callback functions that were either automatically assigned or
|
||||||
#' explicitly passed.
|
#' explicitly passed.
|
||||||
#' \item \code{evaluation_log} evaluation history storead as a \code{data.table} with the
|
#' \item \code{evaluation_log} evaluation history storead as a \code{data.table} with the
|
||||||
#' first column corresponding to iteration number and the rest corresponding to the
|
#' first column corresponding to iteration number and the rest corresponding to the
|
||||||
#' CV-based evaluation means and standard deviations for the training and test CV-sets.
|
#' CV-based evaluation means and standard deviations for the training and test CV-sets.
|
||||||
#' It is created by the \code{\link{cb.evaluation.log}} callback.
|
#' It is created by the \code{\link{cb.evaluation.log}} callback.
|
||||||
#' \item \code{niter} number of boosting iterations.
|
#' \item \code{niter} number of boosting iterations.
|
||||||
#' \item \code{nfeatures} number of features in training data.
|
#' \item \code{nfeatures} number of features in training data.
|
||||||
#' \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
|
#' \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
|
||||||
#' parameter or randomly generated.
|
#' parameter or randomly generated.
|
||||||
#' \item \code{best_iteration} iteration number with the best evaluation metric value
|
#' \item \code{best_iteration} iteration number with the best evaluation metric value
|
||||||
#' (only available with early stopping).
|
#' (only available with early stopping).
|
||||||
#' \item \code{best_ntreelimit} the \code{ntreelimit} value corresponding to the best iteration,
|
#' \item \code{best_ntreelimit} the \code{ntreelimit} value corresponding to the best iteration,
|
||||||
#' which could further be used in \code{predict} method
|
#' which could further be used in \code{predict} method
|
||||||
#' (only available with early stopping).
|
#' (only available with early stopping).
|
||||||
#' \item \code{pred} CV prediction values available when \code{prediction} is set.
|
#' \item \code{pred} CV prediction values available when \code{prediction} is set.
|
||||||
#' It is either vector or matrix (see \code{\link{cb.cv.predict}}).
|
#' It is either vector or matrix (see \code{\link{cb.cv.predict}}).
|
||||||
#' \item \code{models} a liost of the CV folds' models. It is only available with the explicit
|
#' \item \code{models} a liost of the CV folds' models. It is only available with the explicit
|
||||||
#' setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
|
#' setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
|
||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
@ -110,32 +110,32 @@
|
|||||||
#' max_depth = 3, eta = 1, objective = "binary:logistic")
|
#' max_depth = 3, eta = 1, objective = "binary:logistic")
|
||||||
#' print(cv)
|
#' print(cv)
|
||||||
#' print(cv, verbose=TRUE)
|
#' print(cv, verbose=TRUE)
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
|
xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = NA,
|
||||||
prediction = FALSE, showsd = TRUE, metrics=list(),
|
prediction = FALSE, showsd = TRUE, metrics=list(),
|
||||||
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL,
|
obj = NULL, feval = NULL, stratified = TRUE, folds = NULL,
|
||||||
verbose = TRUE, print_every_n=1L,
|
verbose = TRUE, print_every_n=1L,
|
||||||
early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) {
|
early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) {
|
||||||
|
|
||||||
check.deprecation(...)
|
check.deprecation(...)
|
||||||
|
|
||||||
params <- check.booster.params(params, ...)
|
params <- check.booster.params(params, ...)
|
||||||
# TODO: should we deprecate the redundant 'metrics' parameter?
|
# TODO: should we deprecate the redundant 'metrics' parameter?
|
||||||
for (m in metrics)
|
for (m in metrics)
|
||||||
params <- c(params, list("eval_metric" = m))
|
params <- c(params, list("eval_metric" = m))
|
||||||
|
|
||||||
check.custom.obj()
|
check.custom.obj()
|
||||||
check.custom.eval()
|
check.custom.eval()
|
||||||
|
|
||||||
#if (is.null(params[['eval_metric']]) && is.null(feval))
|
#if (is.null(params[['eval_metric']]) && is.null(feval))
|
||||||
# stop("Either 'eval_metric' or 'feval' must be provided for CV")
|
# stop("Either 'eval_metric' or 'feval' must be provided for CV")
|
||||||
|
|
||||||
# Check the labels
|
# Check the labels
|
||||||
if ( (inherits(data, 'xgb.DMatrix') && is.null(getinfo(data, 'label'))) ||
|
if ( (inherits(data, 'xgb.DMatrix') && is.null(getinfo(data, 'label'))) ||
|
||||||
(!inherits(data, 'xgb.DMatrix') && is.null(label)))
|
(!inherits(data, 'xgb.DMatrix') && is.null(label)))
|
||||||
stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix")
|
stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix")
|
||||||
|
|
||||||
# CV folds
|
# CV folds
|
||||||
if(!is.null(folds)) {
|
if(!is.null(folds)) {
|
||||||
if(!is.list(folds) || length(folds) < 2)
|
if(!is.list(folds) || length(folds) < 2)
|
||||||
@ -146,7 +146,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
stop("'nfold' must be > 1")
|
stop("'nfold' must be > 1")
|
||||||
folds <- generate.cv.folds(nfold, nrow(data), stratified, label, params)
|
folds <- generate.cv.folds(nfold, nrow(data), stratified, label, params)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Potential TODO: sequential CV
|
# Potential TODO: sequential CV
|
||||||
#if (strategy == 'sequential')
|
#if (strategy == 'sequential')
|
||||||
# stop('Sequential CV strategy is not yet implemented')
|
# stop('Sequential CV strategy is not yet implemented')
|
||||||
@ -166,7 +166,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
stop_condition <- FALSE
|
stop_condition <- FALSE
|
||||||
if (!is.null(early_stopping_rounds) &&
|
if (!is.null(early_stopping_rounds) &&
|
||||||
!has.callbacks(callbacks, 'cb.early.stop')) {
|
!has.callbacks(callbacks, 'cb.early.stop')) {
|
||||||
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
|
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
|
||||||
maximize = maximize, verbose = verbose))
|
maximize = maximize, verbose = verbose))
|
||||||
}
|
}
|
||||||
# CV-predictions callback
|
# CV-predictions callback
|
||||||
@ -177,7 +177,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
# Sort the callbacks into categories
|
# Sort the callbacks into categories
|
||||||
cb <- categorize.callbacks(callbacks)
|
cb <- categorize.callbacks(callbacks)
|
||||||
|
|
||||||
|
|
||||||
# create the booster-folds
|
# create the booster-folds
|
||||||
dall <- xgb.get.DMatrix(data, label, missing)
|
dall <- xgb.get.DMatrix(data, label, missing)
|
||||||
bst_folds <- lapply(seq_along(folds), function(k) {
|
bst_folds <- lapply(seq_along(folds), function(k) {
|
||||||
@ -197,12 +197,12 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
# those are fixed for CV (no training continuation)
|
# those are fixed for CV (no training continuation)
|
||||||
begin_iteration <- 1
|
begin_iteration <- 1
|
||||||
end_iteration <- nrounds
|
end_iteration <- nrounds
|
||||||
|
|
||||||
# synchronous CV boosting: run CV folds' models within each iteration
|
# synchronous CV boosting: run CV folds' models within each iteration
|
||||||
for (iteration in begin_iteration:end_iteration) {
|
for (iteration in begin_iteration:end_iteration) {
|
||||||
|
|
||||||
for (f in cb$pre_iter) f()
|
for (f in cb$pre_iter) f()
|
||||||
|
|
||||||
msg <- lapply(bst_folds, function(fd) {
|
msg <- lapply(bst_folds, function(fd) {
|
||||||
xgb.iter.update(fd$bst, fd$dtrain, iteration - 1, obj)
|
xgb.iter.update(fd$bst, fd$dtrain, iteration - 1, obj)
|
||||||
xgb.iter.eval(fd$bst, fd$watchlist, iteration - 1, feval)
|
xgb.iter.eval(fd$bst, fd$watchlist, iteration - 1, feval)
|
||||||
@ -210,9 +210,9 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
msg <- simplify2array(msg)
|
msg <- simplify2array(msg)
|
||||||
bst_evaluation <- rowMeans(msg)
|
bst_evaluation <- rowMeans(msg)
|
||||||
bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2)
|
bst_evaluation_err <- sqrt(rowMeans(msg^2) - bst_evaluation^2)
|
||||||
|
|
||||||
for (f in cb$post_iter) f()
|
for (f in cb$post_iter) f()
|
||||||
|
|
||||||
if (stop_condition) break
|
if (stop_condition) break
|
||||||
}
|
}
|
||||||
for (f in cb$finalize) f(finalize = TRUE)
|
for (f in cb$finalize) f(finalize = TRUE)
|
||||||
@ -236,17 +236,17 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
|
|
||||||
|
|
||||||
#' Print xgb.cv result
|
#' Print xgb.cv result
|
||||||
#'
|
#'
|
||||||
#' Prints formatted results of \code{xgb.cv}.
|
#' Prints formatted results of \code{xgb.cv}.
|
||||||
#'
|
#'
|
||||||
#' @param x an \code{xgb.cv.synchronous} object
|
#' @param x an \code{xgb.cv.synchronous} object
|
||||||
#' @param verbose whether to print detailed data
|
#' @param verbose whether to print detailed data
|
||||||
#' @param ... passed to \code{data.table.print}
|
#' @param ... passed to \code{data.table.print}
|
||||||
#'
|
#'
|
||||||
#' @details
|
#' @details
|
||||||
#' When not verbose, it would only print the evaluation results,
|
#' When not verbose, it would only print the evaluation results,
|
||||||
#' including the best iteration (when available).
|
#' including the best iteration (when available).
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
#' train <- agaricus.train
|
#' train <- agaricus.train
|
||||||
@ -254,13 +254,13 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
|
||||||
#' print(cv)
|
#' print(cv)
|
||||||
#' print(cv, verbose=TRUE)
|
#' print(cv, verbose=TRUE)
|
||||||
#'
|
#'
|
||||||
#' @rdname print.xgb.cv
|
#' @rdname print.xgb.cv
|
||||||
#' @method print xgb.cv.synchronous
|
#' @method print xgb.cv.synchronous
|
||||||
#' @export
|
#' @export
|
||||||
print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
|
print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
|
||||||
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep = '')
|
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep = '')
|
||||||
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
if (!is.null(x$call)) {
|
if (!is.null(x$call)) {
|
||||||
cat('call:\n ')
|
cat('call:\n ')
|
||||||
@ -268,8 +268,8 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
|
|||||||
}
|
}
|
||||||
if (!is.null(x$params)) {
|
if (!is.null(x$params)) {
|
||||||
cat('params (as set within xgb.cv):\n')
|
cat('params (as set within xgb.cv):\n')
|
||||||
cat( ' ',
|
cat( ' ',
|
||||||
paste(names(x$params),
|
paste(names(x$params),
|
||||||
paste0('"', unlist(x$params), '"'),
|
paste0('"', unlist(x$params), '"'),
|
||||||
sep = ' = ', collapse = ', '), '\n', sep = '')
|
sep = ' = ', collapse = ', '), '\n', sep = '')
|
||||||
}
|
}
|
||||||
@ -280,9 +280,9 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
|
|||||||
print(x)
|
print(x)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
|
for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
|
||||||
if (is.null(x[[n]]))
|
if (is.null(x[[n]]))
|
||||||
next
|
next
|
||||||
cat(n, ': ', x[[n]], '\n', sep = '')
|
cat(n, ': ', x[[n]], '\n', sep = '')
|
||||||
}
|
}
|
||||||
@ -293,10 +293,10 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (verbose)
|
if (verbose)
|
||||||
cat('evaluation_log:\n')
|
cat('evaluation_log:\n')
|
||||||
print(x$evaluation_log, row.names = FALSE, ...)
|
print(x$evaluation_log, row.names = FALSE, ...)
|
||||||
|
|
||||||
if (!is.null(x$best_iteration)) {
|
if (!is.null(x$best_iteration)) {
|
||||||
cat('Best iteration:\n')
|
cat('Best iteration:\n')
|
||||||
print(x$evaluation_log[x$best_iteration], row.names = FALSE, ...)
|
print(x$evaluation_log[x$best_iteration], row.names = FALSE, ...)
|
||||||
|
|||||||
@ -42,7 +42,7 @@
|
|||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
|
#' \item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{reg:linear} linear regression (Default).
|
#' \item \code{reg:squarederror} Regression with squared loss (Default).
|
||||||
#' \item \code{reg:logistic} logistic regression.
|
#' \item \code{reg:logistic} logistic regression.
|
||||||
#' \item \code{binary:logistic} logistic regression for binary classification. Output probability.
|
#' \item \code{binary:logistic} logistic regression for binary classification. Output probability.
|
||||||
#' \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
|
#' \item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
|
||||||
|
|||||||
@ -16,7 +16,7 @@ xgb.cv(params = list(), data, nrounds, nfold, label = NULL,
|
|||||||
\itemize{
|
\itemize{
|
||||||
\item \code{objective} objective function, common ones are
|
\item \code{objective} objective function, common ones are
|
||||||
\itemize{
|
\itemize{
|
||||||
\item \code{reg:linear} linear regression
|
\item \code{reg:squarederror} Regression with squared loss.
|
||||||
\item \code{binary:logistic} logistic regression for classification
|
\item \code{binary:logistic} logistic regression for classification
|
||||||
}
|
}
|
||||||
\item \code{eta} step size of each boosting step
|
\item \code{eta} step size of each boosting step
|
||||||
|
|||||||
@ -56,7 +56,7 @@ xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
|
|||||||
\itemize{
|
\itemize{
|
||||||
\item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
|
\item \code{objective} specify the learning task and the corresponding learning objective, users can pass a self-defined function to it. The default objective options are below:
|
||||||
\itemize{
|
\itemize{
|
||||||
\item \code{reg:linear} linear regression (Default).
|
\item \code{reg:squarederror} Regression with squared loss (Default).
|
||||||
\item \code{reg:logistic} logistic regression.
|
\item \code{reg:logistic} logistic regression.
|
||||||
\item \code{binary:logistic} logistic regression for binary classification. Output probability.
|
\item \code{binary:logistic} logistic regression for binary classification. Output probability.
|
||||||
\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
|
\item \code{binary:logitraw} logistic regression for binary classification, output score before logistic transformation.
|
||||||
@ -210,7 +210,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
|
|||||||
watchlist <- list(train = dtrain, eval = dtest)
|
watchlist <- list(train = dtrain, eval = dtest)
|
||||||
|
|
||||||
## A simple xgb.train example:
|
## A simple xgb.train example:
|
||||||
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
|
||||||
objective = "binary:logistic", eval_metric = "auc")
|
objective = "binary:logistic", eval_metric = "auc")
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||||
|
|
||||||
@ -231,12 +231,12 @@ evalerror <- function(preds, dtrain) {
|
|||||||
|
|
||||||
# These functions could be used by passing them either:
|
# These functions could be used by passing them either:
|
||||||
# as 'objective' and 'eval_metric' parameters in the params list:
|
# as 'objective' and 'eval_metric' parameters in the params list:
|
||||||
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
|
||||||
objective = logregobj, eval_metric = evalerror)
|
objective = logregobj, eval_metric = evalerror)
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist)
|
||||||
|
|
||||||
# or through the ... arguments:
|
# or through the ... arguments:
|
||||||
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2)
|
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2)
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||||
objective = logregobj, eval_metric = evalerror)
|
objective = logregobj, eval_metric = evalerror)
|
||||||
|
|
||||||
@ -246,7 +246,7 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
|||||||
|
|
||||||
|
|
||||||
## An xgb.train example of using variable learning rates at each iteration:
|
## An xgb.train example of using variable learning rates at each iteration:
|
||||||
param <- list(max_depth = 2, eta = 1, silent = 1, nthread = 2,
|
param <- list(max_depth = 2, eta = 1, verbosity = 0, nthread = 2,
|
||||||
objective = "binary:logistic", eval_metric = "auc")
|
objective = "binary:logistic", eval_metric = "auc")
|
||||||
my_etas <- list(eta = c(0.5, 0.1))
|
my_etas <- list(eta = c(0.5, 0.1))
|
||||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
|
||||||
|
|||||||
@ -10,12 +10,12 @@ test_label <- agaricus.test$label[1:100]
|
|||||||
test_that("xgb.DMatrix: basic construction", {
|
test_that("xgb.DMatrix: basic construction", {
|
||||||
# from sparse matrix
|
# from sparse matrix
|
||||||
dtest1 <- xgb.DMatrix(test_data, label=test_label)
|
dtest1 <- xgb.DMatrix(test_data, label=test_label)
|
||||||
|
|
||||||
# from dense matrix
|
# from dense matrix
|
||||||
dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label)
|
dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label)
|
||||||
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label'))
|
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label'))
|
||||||
expect_equal(dim(dtest1), dim(dtest2))
|
expect_equal(dim(dtest1), dim(dtest2))
|
||||||
|
|
||||||
#from dense integer matrix
|
#from dense integer matrix
|
||||||
int_data <- as.matrix(test_data)
|
int_data <- as.matrix(test_data)
|
||||||
storage.mode(int_data) <- "integer"
|
storage.mode(int_data) <- "integer"
|
||||||
@ -33,7 +33,7 @@ test_that("xgb.DMatrix: saving, loading", {
|
|||||||
expect_output(dtest3 <- xgb.DMatrix(tmp_file, silent = TRUE), NA)
|
expect_output(dtest3 <- xgb.DMatrix(tmp_file, silent = TRUE), NA)
|
||||||
unlink(tmp_file)
|
unlink(tmp_file)
|
||||||
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
|
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
|
||||||
|
|
||||||
# from a libsvm text file
|
# from a libsvm text file
|
||||||
tmp <- c("0 1:1 2:1","1 3:1","0 1:1")
|
tmp <- c("0 1:1 2:1","1 3:1","0 1:1")
|
||||||
tmp_file <- 'tmp.libsvm'
|
tmp_file <- 'tmp.libsvm'
|
||||||
@ -49,7 +49,7 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
|
|||||||
expect_true(setinfo(dtest, 'label', test_label))
|
expect_true(setinfo(dtest, 'label', test_label))
|
||||||
labels <- getinfo(dtest, 'label')
|
labels <- getinfo(dtest, 'label')
|
||||||
expect_equal(test_label, getinfo(dtest, 'label'))
|
expect_equal(test_label, getinfo(dtest, 'label'))
|
||||||
|
|
||||||
expect_true(length(getinfo(dtest, 'weight')) == 0)
|
expect_true(length(getinfo(dtest, 'weight')) == 0)
|
||||||
expect_true(length(getinfo(dtest, 'base_margin')) == 0)
|
expect_true(length(getinfo(dtest, 'base_margin')) == 0)
|
||||||
|
|
||||||
@ -57,10 +57,10 @@ test_that("xgb.DMatrix: getinfo & setinfo", {
|
|||||||
expect_true(setinfo(dtest, 'base_margin', test_label))
|
expect_true(setinfo(dtest, 'base_margin', test_label))
|
||||||
expect_true(setinfo(dtest, 'group', c(50,50)))
|
expect_true(setinfo(dtest, 'group', c(50,50)))
|
||||||
expect_error(setinfo(dtest, 'group', test_label))
|
expect_error(setinfo(dtest, 'group', test_label))
|
||||||
|
|
||||||
# providing character values will give a warning
|
# providing character values will give a warning
|
||||||
expect_warning( setinfo(dtest, 'weight', rep('a', nrow(test_data))) )
|
expect_warning( setinfo(dtest, 'weight', rep('a', nrow(test_data))) )
|
||||||
|
|
||||||
# any other label should error
|
# any other label should error
|
||||||
expect_error(setinfo(dtest, 'asdf', test_label))
|
expect_error(setinfo(dtest, 'asdf', test_label))
|
||||||
})
|
})
|
||||||
@ -71,7 +71,7 @@ test_that("xgb.DMatrix: slice, dim", {
|
|||||||
dsub1 <- slice(dtest, 1:42)
|
dsub1 <- slice(dtest, 1:42)
|
||||||
expect_equal(nrow(dsub1), 42)
|
expect_equal(nrow(dsub1), 42)
|
||||||
expect_equal(ncol(dsub1), ncol(test_data))
|
expect_equal(ncol(dsub1), ncol(test_data))
|
||||||
|
|
||||||
dsub2 <- dtest[1:42,]
|
dsub2 <- dtest[1:42,]
|
||||||
expect_equal(dim(dtest), dim(test_data))
|
expect_equal(dim(dtest), dim(test_data))
|
||||||
expect_equal(getinfo(dsub1, 'label'), getinfo(dsub2, 'label'))
|
expect_equal(getinfo(dsub1, 'label'), getinfo(dsub2, 'label'))
|
||||||
|
|||||||
@ -98,7 +98,7 @@ test_that("SHAP contribution values are not NAN", {
|
|||||||
fit <- xgboost(
|
fit <- xgboost(
|
||||||
verbose = 0,
|
verbose = 0,
|
||||||
params = list(
|
params = list(
|
||||||
objective = "reg:linear",
|
objective = "reg:squarederror",
|
||||||
eval_metric = "rmse"),
|
eval_metric = "rmse"),
|
||||||
data = as.matrix(subset(d, fold == 2)[, ivs]),
|
data = as.matrix(subset(d, fold == 2)[, ivs]),
|
||||||
label = subset(d, fold == 2)$y,
|
label = subset(d, fold == 2)$y,
|
||||||
|
|||||||
@ -6,9 +6,9 @@ Using XGBoost for regression is very similar to using it for binary classificati
|
|||||||
The dataset we used is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware). The demo for regression is almost the same as the [binary classification demo](../binary_classification), except a little difference in general parameter:
|
The dataset we used is the [computer hardware dataset from UCI repository](https://archive.ics.uci.edu/ml/datasets/Computer+Hardware). The demo for regression is almost the same as the [binary classification demo](../binary_classification), except a little difference in general parameter:
|
||||||
```
|
```
|
||||||
# General parameter
|
# General parameter
|
||||||
# this is the only difference with classification, use reg:linear to do linear regression
|
# this is the only difference with classification, use reg:squarederror to do linear regression
|
||||||
# when labels are in [0,1] we can also use reg:logistic
|
# when labels are in [0,1] we can also use reg:logistic
|
||||||
objective = reg:linear
|
objective = reg:squarederror
|
||||||
...
|
...
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|||||||
@ -1,9 +1,9 @@
|
|||||||
# General Parameters, see comment for each definition
|
# General Parameters, see comment for each definition
|
||||||
# choose the tree booster, can also change to gblinear
|
# choose the tree booster, can also change to gblinear
|
||||||
booster = gbtree
|
booster = gbtree
|
||||||
# this is the only difference with classification, use reg:linear to do linear classification
|
# this is the only difference with classification, use reg:squarederror to do linear classification
|
||||||
# when labels are in [0,1] we can also use reg:logistic
|
# when labels are in [0,1] we can also use reg:logistic
|
||||||
objective = reg:linear
|
objective = reg:squarederror
|
||||||
|
|
||||||
# Tree Booster Parameters
|
# Tree Booster Parameters
|
||||||
# step size shrinkage
|
# step size shrinkage
|
||||||
|
|||||||
@ -1,17 +1,17 @@
|
|||||||
# General Parameters, see comment for each definition
|
# General Parameters, see comment for each definition
|
||||||
# choose the tree booster, can also change to gblinear
|
# choose the tree booster, can also change to gblinear
|
||||||
booster = gbtree
|
booster = gbtree
|
||||||
# this is the only difference with classification, use reg:linear to do linear classification
|
# this is the only difference with classification, use reg:squarederror to do linear classification
|
||||||
# when labels are in [0,1] we can also use reg:logistic
|
# when labels are in [0,1] we can also use reg:logistic
|
||||||
objective = reg:linear
|
objective = reg:squarederror
|
||||||
|
|
||||||
# Tree Booster Parameters
|
# Tree Booster Parameters
|
||||||
# step size shrinkage
|
# step size shrinkage
|
||||||
eta = 1.0
|
eta = 1.0
|
||||||
# minimum loss reduction required to make a further partition
|
# minimum loss reduction required to make a further partition
|
||||||
gamma = 1.0
|
gamma = 1.0
|
||||||
# minimum sum of instance weight(hessian) needed in a child
|
# minimum sum of instance weight(hessian) needed in a child
|
||||||
min_child_weight = 1
|
min_child_weight = 1
|
||||||
# maximum depth of a tree
|
# maximum depth of a tree
|
||||||
max_depth = 5
|
max_depth = 5
|
||||||
|
|
||||||
@ -20,11 +20,10 @@ base_score = 2001
|
|||||||
# the number of round to do boosting
|
# the number of round to do boosting
|
||||||
num_round = 100
|
num_round = 100
|
||||||
# 0 means do not save any model except the final round model
|
# 0 means do not save any model except the final round model
|
||||||
save_period = 0
|
save_period = 0
|
||||||
# The path of training data
|
# The path of training data
|
||||||
data = "yearpredMSD.libsvm.train"
|
data = "yearpredMSD.libsvm.train"
|
||||||
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
|
# The path of validation data, used to monitor training process, here [test] sets name of the validation set
|
||||||
eval[test] = "yearpredMSD.libsvm.test"
|
eval[test] = "yearpredMSD.libsvm.test"
|
||||||
# The path of test data
|
# The path of test data
|
||||||
#test:data = "yearpredMSD.libsvm.test"
|
#test:data = "yearpredMSD.libsvm.test"
|
||||||
|
|
||||||
|
|||||||
@ -92,7 +92,7 @@ Most of the objective functions implemented in XGBoost can be run on GPU. Follo
|
|||||||
+-----------------+-------------+
|
+-----------------+-------------+
|
||||||
| Objectives | GPU support |
|
| Objectives | GPU support |
|
||||||
+-----------------+-------------+
|
+-----------------+-------------+
|
||||||
| reg:linear | |tick| |
|
| reg:squarederror| |tick| |
|
||||||
+-----------------+-------------+
|
+-----------------+-------------+
|
||||||
| reg:logistic | |tick| |
|
| reg:logistic | |tick| |
|
||||||
+-----------------+-------------+
|
+-----------------+-------------+
|
||||||
|
|||||||
@ -293,9 +293,9 @@ Learning Task Parameters
|
|||||||
************************
|
************************
|
||||||
Specify the learning task and the corresponding learning objective. The objective options are below:
|
Specify the learning task and the corresponding learning objective. The objective options are below:
|
||||||
|
|
||||||
* ``objective`` [default=reg:linear]
|
* ``objective`` [default=reg:squarederror]
|
||||||
|
|
||||||
- ``reg:linear``: linear regression
|
- ``reg:squarederror``: regression with squared loss
|
||||||
- ``reg:logistic``: logistic regression
|
- ``reg:logistic``: logistic regression
|
||||||
- ``binary:logistic``: logistic regression for binary classification, output probability
|
- ``binary:logistic``: logistic regression for binary classification, output probability
|
||||||
- ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation
|
- ``binary:logitraw``: logistic regression for binary classification, output score before logistic transformation
|
||||||
|
|||||||
@ -36,7 +36,7 @@ The following parameters must be set to enable random forest training.
|
|||||||
|
|
||||||
|
|
||||||
Other parameters should be set in a similar way they are set for gradient boosting. For
|
Other parameters should be set in a similar way they are set for gradient boosting. For
|
||||||
instance, ``objective`` will typically be ``reg:linear`` for regression and
|
instance, ``objective`` will typically be ``reg:squarederror`` for regression and
|
||||||
``binary:logistic`` for classification, ``lambda`` should be set according to a desired
|
``binary:logistic`` for classification, ``lambda`` should be set according to a desired
|
||||||
regularization weight, etc.
|
regularization weight, etc.
|
||||||
|
|
||||||
|
|||||||
@ -24,8 +24,8 @@ private[spark] trait LearningTaskParams extends Params {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Specify the learning task and the corresponding learning objective.
|
* Specify the learning task and the corresponding learning objective.
|
||||||
* options: reg:linear, reg:logistic, binary:logistic, binary:logitraw, count:poisson,
|
* options: reg:squarederror, reg:logistic, binary:logistic, binary:logitraw, count:poisson,
|
||||||
* multi:softmax, multi:softprob, rank:pairwise, reg:gamma. default: reg:linear
|
* multi:softmax, multi:softprob, rank:pairwise, reg:gamma. default: reg:squarederror
|
||||||
*/
|
*/
|
||||||
final val objective = new Param[String](this, "objective", "objective function used for " +
|
final val objective = new Param[String](this, "objective", "objective function used for " +
|
||||||
s"training, options: {${LearningTaskParams.supportedObjective.mkString(",")}",
|
s"training, options: {${LearningTaskParams.supportedObjective.mkString(",")}",
|
||||||
@ -94,12 +94,12 @@ private[spark] trait LearningTaskParams extends Params {
|
|||||||
|
|
||||||
final def getMaximizeEvaluationMetrics: Boolean = $(maximizeEvaluationMetrics)
|
final def getMaximizeEvaluationMetrics: Boolean = $(maximizeEvaluationMetrics)
|
||||||
|
|
||||||
setDefault(objective -> "reg:linear", baseScore -> 0.5,
|
setDefault(objective -> "reg:squarederror", baseScore -> 0.5,
|
||||||
trainTestRatio -> 1.0, numEarlyStoppingRounds -> 0)
|
trainTestRatio -> 1.0, numEarlyStoppingRounds -> 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
private[spark] object LearningTaskParams {
|
private[spark] object LearningTaskParams {
|
||||||
val supportedObjective = HashSet("reg:linear", "reg:logistic", "binary:logistic",
|
val supportedObjective = HashSet("reg:squarederror", "reg:logistic", "binary:logistic",
|
||||||
"binary:logitraw", "count:poisson", "multi:softmax", "multi:softprob", "rank:pairwise",
|
"binary:logitraw", "count:poisson", "multi:softmax", "multi:softprob", "rank:pairwise",
|
||||||
"rank:ndcg", "rank:map", "reg:gamma", "reg:tweedie")
|
"rank:ndcg", "rank:map", "reg:gamma", "reg:tweedie")
|
||||||
|
|
||||||
|
|||||||
@ -96,7 +96,7 @@ class PersistenceSuite extends FunSuite with PerTest with BeforeAndAfterAll {
|
|||||||
val testDM = new DMatrix(Regression.test.iterator)
|
val testDM = new DMatrix(Regression.test.iterator)
|
||||||
|
|
||||||
val paramMap = Map("eta" -> "0.1", "max_depth" -> "6", "silent" -> "1",
|
val paramMap = Map("eta" -> "0.1", "max_depth" -> "6", "silent" -> "1",
|
||||||
"objective" -> "reg:linear", "num_round" -> "10", "num_workers" -> numWorkers)
|
"objective" -> "reg:squarederror", "num_round" -> "10", "num_workers" -> numWorkers)
|
||||||
val xgbr = new XGBoostRegressor(paramMap)
|
val xgbr = new XGBoostRegressor(paramMap)
|
||||||
val xgbrPath = new File(tempDir, "xgbr").getPath
|
val xgbrPath = new File(tempDir, "xgbr").getPath
|
||||||
xgbr.write.overwrite().save(xgbrPath)
|
xgbr.write.overwrite().save(xgbrPath)
|
||||||
|
|||||||
@ -36,7 +36,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
|
|||||||
"eta" -> "1",
|
"eta" -> "1",
|
||||||
"max_depth" -> "6",
|
"max_depth" -> "6",
|
||||||
"silent" -> "1",
|
"silent" -> "1",
|
||||||
"objective" -> "reg:linear")
|
"objective" -> "reg:squarederror")
|
||||||
|
|
||||||
val model1 = ScalaXGBoost.train(trainingDM, paramMap, round)
|
val model1 = ScalaXGBoost.train(trainingDM, paramMap, round)
|
||||||
val prediction1 = model1.predict(testDM)
|
val prediction1 = model1.predict(testDM)
|
||||||
@ -69,7 +69,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
|
|||||||
"eta" -> "1",
|
"eta" -> "1",
|
||||||
"max_depth" -> "6",
|
"max_depth" -> "6",
|
||||||
"silent" -> "1",
|
"silent" -> "1",
|
||||||
"objective" -> "reg:linear",
|
"objective" -> "reg:squarederror",
|
||||||
"num_round" -> round,
|
"num_round" -> round,
|
||||||
"num_workers" -> numWorkers)
|
"num_workers" -> numWorkers)
|
||||||
|
|
||||||
@ -80,7 +80,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
|
|||||||
.setEta(1)
|
.setEta(1)
|
||||||
.setMaxDepth(6)
|
.setMaxDepth(6)
|
||||||
.setSilent(1)
|
.setSilent(1)
|
||||||
.setObjective("reg:linear")
|
.setObjective("reg:squarederror")
|
||||||
.setNumRound(round)
|
.setNumRound(round)
|
||||||
.setNumWorkers(numWorkers)
|
.setNumWorkers(numWorkers)
|
||||||
.fit(trainingDF)
|
.fit(trainingDF)
|
||||||
@ -108,7 +108,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
|
|||||||
|
|
||||||
test("use weight") {
|
test("use weight") {
|
||||||
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
||||||
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
|
"objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
|
||||||
|
|
||||||
val getWeightFromId = udf({id: Int => if (id == 0) 1.0f else 0.001f}, DataTypes.FloatType)
|
val getWeightFromId = udf({id: Int => if (id == 0) 1.0f else 0.001f}, DataTypes.FloatType)
|
||||||
val trainingDF = buildDataFrame(Regression.train)
|
val trainingDF = buildDataFrame(Regression.train)
|
||||||
@ -123,7 +123,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
|
|||||||
|
|
||||||
test("test predictionLeaf") {
|
test("test predictionLeaf") {
|
||||||
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
||||||
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
|
"objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
|
||||||
val training = buildDataFrame(Regression.train)
|
val training = buildDataFrame(Regression.train)
|
||||||
val testDF = buildDataFrame(Regression.test)
|
val testDF = buildDataFrame(Regression.test)
|
||||||
val groundTruth = testDF.count()
|
val groundTruth = testDF.count()
|
||||||
@ -137,7 +137,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
|
|||||||
|
|
||||||
test("test predictionLeaf with empty column name") {
|
test("test predictionLeaf with empty column name") {
|
||||||
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
||||||
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
|
"objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
|
||||||
val training = buildDataFrame(Regression.train)
|
val training = buildDataFrame(Regression.train)
|
||||||
val testDF = buildDataFrame(Regression.test)
|
val testDF = buildDataFrame(Regression.test)
|
||||||
val xgb = new XGBoostRegressor(paramMap)
|
val xgb = new XGBoostRegressor(paramMap)
|
||||||
@ -149,7 +149,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
|
|||||||
|
|
||||||
test("test predictionContrib") {
|
test("test predictionContrib") {
|
||||||
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
||||||
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
|
"objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
|
||||||
val training = buildDataFrame(Regression.train)
|
val training = buildDataFrame(Regression.train)
|
||||||
val testDF = buildDataFrame(Regression.test)
|
val testDF = buildDataFrame(Regression.test)
|
||||||
val groundTruth = testDF.count()
|
val groundTruth = testDF.count()
|
||||||
@ -163,7 +163,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
|
|||||||
|
|
||||||
test("test predictionContrib with empty column name") {
|
test("test predictionContrib with empty column name") {
|
||||||
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
||||||
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
|
"objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
|
||||||
val training = buildDataFrame(Regression.train)
|
val training = buildDataFrame(Regression.train)
|
||||||
val testDF = buildDataFrame(Regression.test)
|
val testDF = buildDataFrame(Regression.test)
|
||||||
val xgb = new XGBoostRegressor(paramMap)
|
val xgb = new XGBoostRegressor(paramMap)
|
||||||
@ -175,7 +175,7 @@ class XGBoostRegressorSuite extends FunSuite with PerTest {
|
|||||||
|
|
||||||
test("test predictionLeaf and predictionContrib") {
|
test("test predictionLeaf and predictionContrib") {
|
||||||
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
val paramMap = Map("eta" -> "1", "max_depth" -> "6", "silent" -> "1",
|
||||||
"objective" -> "reg:linear", "num_round" -> 5, "num_workers" -> numWorkers)
|
"objective" -> "reg:squarederror", "num_round" -> 5, "num_workers" -> numWorkers)
|
||||||
val training = buildDataFrame(Regression.train)
|
val training = buildDataFrame(Regression.train)
|
||||||
val testDF = buildDataFrame(Regression.test)
|
val testDF = buildDataFrame(Regression.test)
|
||||||
val groundTruth = testDF.count()
|
val groundTruth = testDF.count()
|
||||||
|
|||||||
@ -128,8 +128,8 @@ class RegLossObj : public ObjFunction {
|
|||||||
// register the objective functions
|
// register the objective functions
|
||||||
DMLC_REGISTER_PARAMETER(RegLossParam);
|
DMLC_REGISTER_PARAMETER(RegLossParam);
|
||||||
|
|
||||||
XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
|
XGBOOST_REGISTER_OBJECTIVE(SquaredLossRegression, "reg:squarederror")
|
||||||
.describe("Linear regression.")
|
.describe("Regression with squared error.")
|
||||||
.set_body([]() { return new RegLossObj<LinearSquareLoss>(); });
|
.set_body([]() { return new RegLossObj<LinearSquareLoss>(); });
|
||||||
|
|
||||||
XGBOOST_REGISTER_OBJECTIVE(LogisticRegression, "reg:logistic")
|
XGBOOST_REGISTER_OBJECTIVE(LogisticRegression, "reg:logistic")
|
||||||
@ -145,7 +145,13 @@ XGBOOST_REGISTER_OBJECTIVE(LogisticRaw, "binary:logitraw")
|
|||||||
"before logistic transformation.")
|
"before logistic transformation.")
|
||||||
.set_body([]() { return new RegLossObj<LogisticRaw>(); });
|
.set_body([]() { return new RegLossObj<LogisticRaw>(); });
|
||||||
|
|
||||||
// Deprecated GPU functions
|
// Deprecated functions
|
||||||
|
XGBOOST_REGISTER_OBJECTIVE(LinearRegression, "reg:linear")
|
||||||
|
.describe("Regression with squared error.")
|
||||||
|
.set_body([]() {
|
||||||
|
LOG(WARNING) << "reg:linear is now deprecated in favor of reg:squarederror.";
|
||||||
|
return new RegLossObj<LinearSquareLoss>(); });
|
||||||
|
|
||||||
XGBOOST_REGISTER_OBJECTIVE(GPULinearRegression, "gpu:reg:linear")
|
XGBOOST_REGISTER_OBJECTIVE(GPULinearRegression, "gpu:reg:linear")
|
||||||
.describe("Deprecated. Linear regression (computed on GPU).")
|
.describe("Deprecated. Linear regression (computed on GPU).")
|
||||||
.set_body([]() {
|
.set_body([]() {
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
// Copyright by Contributors
|
// Copyright by Contributors
|
||||||
|
#include <gtest/gtest.h>
|
||||||
#include <xgboost/objective.h>
|
#include <xgboost/objective.h>
|
||||||
|
|
||||||
#include "../helpers.h"
|
#include "../helpers.h"
|
||||||
@ -6,7 +7,7 @@
|
|||||||
TEST(Objective, UnknownFunction) {
|
TEST(Objective, UnknownFunction) {
|
||||||
xgboost::ObjFunction* obj = nullptr;
|
xgboost::ObjFunction* obj = nullptr;
|
||||||
EXPECT_ANY_THROW(obj = xgboost::ObjFunction::Create("unknown_name"));
|
EXPECT_ANY_THROW(obj = xgboost::ObjFunction::Create("unknown_name"));
|
||||||
EXPECT_NO_THROW(obj = xgboost::ObjFunction::Create("reg:linear"));
|
EXPECT_NO_THROW(obj = xgboost::ObjFunction::Create("reg:squarederror"));
|
||||||
if (obj) {
|
if (obj) {
|
||||||
delete obj;
|
delete obj;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,12 +1,13 @@
|
|||||||
/*!
|
/*!
|
||||||
* Copyright 2017-2018 XGBoost contributors
|
* Copyright 2017-2019 XGBoost contributors
|
||||||
*/
|
*/
|
||||||
|
#include <gtest/gtest.h>
|
||||||
#include <xgboost/objective.h>
|
#include <xgboost/objective.h>
|
||||||
|
|
||||||
#include "../helpers.h"
|
#include "../helpers.h"
|
||||||
|
|
||||||
TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
|
TEST(Objective, DeclareUnifiedTest(LinearRegressionGPair)) {
|
||||||
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:linear");
|
xgboost::ObjFunction * obj = xgboost::ObjFunction::Create("reg:squarederror");
|
||||||
std::vector<std::pair<std::string, std::string> > args;
|
std::vector<std::pair<std::string, std::string> > args;
|
||||||
obj->Configure(args);
|
obj->Configure(args);
|
||||||
CheckObjFunction(obj,
|
CheckObjFunction(obj,
|
||||||
|
|||||||
@ -132,15 +132,16 @@ def run_suite(param, num_rounds=10, select_datasets=None, scale_features=False):
|
|||||||
Run the given parameters on a range of datasets. Objective and eval metric will be automatically set
|
Run the given parameters on a range of datasets. Objective and eval metric will be automatically set
|
||||||
"""
|
"""
|
||||||
datasets = [
|
datasets = [
|
||||||
Dataset("Boston", get_boston, "reg:linear", "rmse"),
|
Dataset("Boston", get_boston, "reg:squarederror", "rmse"),
|
||||||
Dataset("Digits", get_digits, "multi:softmax", "merror"),
|
Dataset("Digits", get_digits, "multi:softmax", "merror"),
|
||||||
Dataset("Cancer", get_cancer, "binary:logistic", "error"),
|
Dataset("Cancer", get_cancer, "binary:logistic", "error"),
|
||||||
Dataset("Sparse regression", get_sparse, "reg:linear", "rmse"),
|
Dataset("Sparse regression", get_sparse, "reg:squarederror", "rmse"),
|
||||||
Dataset("Sparse regression with weights", get_sparse_weights,
|
Dataset("Sparse regression with weights", get_sparse_weights,
|
||||||
"reg:linear", "rmse", has_weights=True),
|
"reg:squarederror", "rmse", has_weights=True),
|
||||||
Dataset("Small weights regression", get_small_weights,
|
Dataset("Small weights regression", get_small_weights,
|
||||||
"reg:linear", "rmse", has_weights=True),
|
"reg:squarederror", "rmse", has_weights=True),
|
||||||
Dataset("Boston External Memory", get_boston, "reg:linear", "rmse",
|
Dataset("Boston External Memory", get_boston,
|
||||||
|
"reg:squarederror", "rmse",
|
||||||
use_external_memory=True)
|
use_external_memory=True)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
@ -38,7 +38,7 @@ class TestBasic(unittest.TestCase):
|
|||||||
def test_basic(self):
|
def test_basic(self):
|
||||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
# specify validations set to watch performance
|
# specify validations set to watch performance
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
@ -85,7 +85,7 @@ class TestBasic(unittest.TestCase):
|
|||||||
def test_record_results(self):
|
def test_record_results(self):
|
||||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
# specify validations set to watch performance
|
# specify validations set to watch performance
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
@ -102,7 +102,7 @@ class TestBasic(unittest.TestCase):
|
|||||||
def test_multiclass(self):
|
def test_multiclass(self):
|
||||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'num_class': 2}
|
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2}
|
||||||
# specify validations set to watch performance
|
# specify validations set to watch performance
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
num_round = 2
|
num_round = 2
|
||||||
@ -273,7 +273,7 @@ class TestBasic(unittest.TestCase):
|
|||||||
|
|
||||||
def test_cv(self):
|
def test_cv(self):
|
||||||
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
|
|
||||||
# return np.ndarray
|
# return np.ndarray
|
||||||
@ -283,7 +283,7 @@ class TestBasic(unittest.TestCase):
|
|||||||
|
|
||||||
def test_cv_no_shuffle(self):
|
def test_cv_no_shuffle(self):
|
||||||
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
|
|
||||||
# return np.ndarray
|
# return np.ndarray
|
||||||
@ -294,7 +294,7 @@ class TestBasic(unittest.TestCase):
|
|||||||
|
|
||||||
def test_cv_explicit_fold_indices(self):
|
def test_cv_explicit_fold_indices(self):
|
||||||
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective':
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
|
||||||
'binary:logistic'}
|
'binary:logistic'}
|
||||||
folds = [
|
folds = [
|
||||||
# Train Test
|
# Train Test
|
||||||
@ -310,7 +310,7 @@ class TestBasic(unittest.TestCase):
|
|||||||
|
|
||||||
def test_cv_explicit_fold_indices_labels(self):
|
def test_cv_explicit_fold_indices_labels(self):
|
||||||
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'objective':
|
||||||
'reg:linear'}
|
'reg:squarederror'}
|
||||||
N = 100
|
N = 100
|
||||||
F = 3
|
F = 3
|
||||||
dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
|
dm = xgb.DMatrix(data=np.random.randn(N, F), label=np.arange(N))
|
||||||
|
|||||||
@ -11,7 +11,7 @@ rng = np.random.RandomState(1994)
|
|||||||
|
|
||||||
class TestModels(unittest.TestCase):
|
class TestModels(unittest.TestCase):
|
||||||
def test_glm(self):
|
def test_glm(self):
|
||||||
param = {'silent': 1, 'objective': 'binary:logistic',
|
param = {'verbosity': 0, 'objective': 'binary:logistic',
|
||||||
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1, 'nthread': 1}
|
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1, 'nthread': 1}
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
num_round = 4
|
num_round = 4
|
||||||
@ -26,7 +26,7 @@ class TestModels(unittest.TestCase):
|
|||||||
def test_dart(self):
|
def test_dart(self):
|
||||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
dtest = xgb.DMatrix(dpath + 'agaricus.txt.test')
|
||||||
param = {'max_depth': 5, 'objective': 'binary:logistic', 'booster': 'dart', 'silent': False}
|
param = {'max_depth': 5, 'objective': 'binary:logistic', 'booster': 'dart', 'verbosity': 1}
|
||||||
# specify validations set to watch performance
|
# specify validations set to watch performance
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
num_round = 2
|
num_round = 2
|
||||||
@ -51,7 +51,7 @@ class TestModels(unittest.TestCase):
|
|||||||
|
|
||||||
# check whether sample_type and normalize_type work
|
# check whether sample_type and normalize_type work
|
||||||
num_round = 50
|
num_round = 50
|
||||||
param['silent'] = True
|
param['verbosity'] = 0
|
||||||
param['learning_rate'] = 0.1
|
param['learning_rate'] = 0.1
|
||||||
param['rate_drop'] = 0.1
|
param['rate_drop'] = 0.1
|
||||||
preds_list = []
|
preds_list = []
|
||||||
@ -74,7 +74,8 @@ class TestModels(unittest.TestCase):
|
|||||||
|
|
||||||
# learning_rates as a list
|
# learning_rates as a list
|
||||||
# init eta with 0 to check whether learning_rates work
|
# init eta with 0 to check whether learning_rates work
|
||||||
param = {'max_depth': 2, 'eta': 0, 'silent': 1, 'objective': 'binary:logistic'}
|
param = {'max_depth': 2, 'eta': 0, 'verbosity': 0,
|
||||||
|
'objective': 'binary:logistic'}
|
||||||
evals_result = {}
|
evals_result = {}
|
||||||
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5],
|
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5],
|
||||||
evals_result=evals_result)
|
evals_result=evals_result)
|
||||||
@ -84,7 +85,8 @@ class TestModels(unittest.TestCase):
|
|||||||
assert eval_errors[0] > eval_errors[-1]
|
assert eval_errors[0] > eval_errors[-1]
|
||||||
|
|
||||||
# init learning_rate with 0 to check whether learning_rates work
|
# init learning_rate with 0 to check whether learning_rates work
|
||||||
param = {'max_depth': 2, 'learning_rate': 0, 'silent': 1, 'objective': 'binary:logistic'}
|
param = {'max_depth': 2, 'learning_rate': 0, 'verbosity': 0,
|
||||||
|
'objective': 'binary:logistic'}
|
||||||
evals_result = {}
|
evals_result = {}
|
||||||
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5],
|
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5],
|
||||||
evals_result=evals_result)
|
evals_result=evals_result)
|
||||||
@ -94,7 +96,7 @@ class TestModels(unittest.TestCase):
|
|||||||
assert eval_errors[0] > eval_errors[-1]
|
assert eval_errors[0] > eval_errors[-1]
|
||||||
|
|
||||||
# check if learning_rates override default value of eta/learning_rate
|
# check if learning_rates override default value of eta/learning_rate
|
||||||
param = {'max_depth': 2, 'silent': 1, 'objective': 'binary:logistic'}
|
param = {'max_depth': 2, 'verbosity': 0, 'objective': 'binary:logistic'}
|
||||||
evals_result = {}
|
evals_result = {}
|
||||||
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0, 0, 0, 0],
|
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0, 0, 0, 0],
|
||||||
evals_result=evals_result)
|
evals_result=evals_result)
|
||||||
@ -111,7 +113,7 @@ class TestModels(unittest.TestCase):
|
|||||||
assert isinstance(bst, xgb.core.Booster)
|
assert isinstance(bst, xgb.core.Booster)
|
||||||
|
|
||||||
def test_custom_objective(self):
|
def test_custom_objective(self):
|
||||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1}
|
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0}
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
num_round = 2
|
num_round = 2
|
||||||
|
|
||||||
@ -152,7 +154,8 @@ class TestModels(unittest.TestCase):
|
|||||||
|
|
||||||
def test_multi_eval_metric(self):
|
def test_multi_eval_metric(self):
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
||||||
param = {'max_depth': 2, 'eta': 0.2, 'silent': 1, 'objective': 'binary:logistic'}
|
param = {'max_depth': 2, 'eta': 0.2, 'verbosity': 0,
|
||||||
|
'objective': 'binary:logistic'}
|
||||||
param['eval_metric'] = ["auc", "logloss", 'error']
|
param['eval_metric'] = ["auc", "logloss", 'error']
|
||||||
evals_result = {}
|
evals_result = {}
|
||||||
bst = xgb.train(param, dtrain, 4, watchlist, evals_result=evals_result)
|
bst = xgb.train(param, dtrain, 4, watchlist, evals_result=evals_result)
|
||||||
@ -161,7 +164,7 @@ class TestModels(unittest.TestCase):
|
|||||||
assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'}
|
assert set(evals_result['eval'].keys()) == {'auc', 'error', 'logloss'}
|
||||||
|
|
||||||
def test_fpreproc(self):
|
def test_fpreproc(self):
|
||||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
num_round = 2
|
num_round = 2
|
||||||
|
|
||||||
@ -175,7 +178,7 @@ class TestModels(unittest.TestCase):
|
|||||||
metrics={'auc'}, seed=0, fpreproc=fpreproc)
|
metrics={'auc'}, seed=0, fpreproc=fpreproc)
|
||||||
|
|
||||||
def test_show_stdv(self):
|
def test_show_stdv(self):
|
||||||
param = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
param = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
num_round = 2
|
num_round = 2
|
||||||
xgb.cv(param, dtrain, num_round, nfold=5,
|
xgb.cv(param, dtrain, num_round, nfold=5,
|
||||||
|
|||||||
@ -52,7 +52,7 @@ class TestEarlyStopping(unittest.TestCase):
|
|||||||
X = digits['data']
|
X = digits['data']
|
||||||
y = digits['target']
|
y = digits['target']
|
||||||
dm = xgb.DMatrix(X, label=y)
|
dm = xgb.DMatrix(X, label=y)
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
|
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
|
|||||||
@ -9,25 +9,25 @@ rng = np.random.RandomState(1337)
|
|||||||
|
|
||||||
class TestEvalMetrics(unittest.TestCase):
|
class TestEvalMetrics(unittest.TestCase):
|
||||||
xgb_params_01 = {
|
xgb_params_01 = {
|
||||||
'silent': 1,
|
'verbosity': 0,
|
||||||
'nthread': 1,
|
'nthread': 1,
|
||||||
'eval_metric': 'error'
|
'eval_metric': 'error'
|
||||||
}
|
}
|
||||||
|
|
||||||
xgb_params_02 = {
|
xgb_params_02 = {
|
||||||
'silent': 1,
|
'verbosity': 0,
|
||||||
'nthread': 1,
|
'nthread': 1,
|
||||||
'eval_metric': ['error']
|
'eval_metric': ['error']
|
||||||
}
|
}
|
||||||
|
|
||||||
xgb_params_03 = {
|
xgb_params_03 = {
|
||||||
'silent': 1,
|
'verbosity': 0,
|
||||||
'nthread': 1,
|
'nthread': 1,
|
||||||
'eval_metric': ['rmse', 'error']
|
'eval_metric': ['rmse', 'error']
|
||||||
}
|
}
|
||||||
|
|
||||||
xgb_params_04 = {
|
xgb_params_04 = {
|
||||||
'silent': 1,
|
'verbosity': 0,
|
||||||
'nthread': 1,
|
'nthread': 1,
|
||||||
'eval_metric': ['error', 'rmse']
|
'eval_metric': ['error', 'rmse']
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,7 +18,7 @@ class TestInteractionConstraints(unittest.TestCase):
|
|||||||
X = np.column_stack((x1, x2, x3))
|
X = np.column_stack((x1, x2, x3))
|
||||||
dtrain = xgboost.DMatrix(X, label=y)
|
dtrain = xgboost.DMatrix(X, label=y)
|
||||||
|
|
||||||
params = {'max_depth': 3, 'eta': 0.1, 'nthread': 2, 'silent': 1,
|
params = {'max_depth': 3, 'eta': 0.1, 'nthread': 2, 'verbosity': 0,
|
||||||
'interaction_constraints': '[[0, 1]]'}
|
'interaction_constraints': '[[0, 1]]'}
|
||||||
num_boost_round = 100
|
num_boost_round = 100
|
||||||
# Fit a model that only allows interaction between x1 and x2
|
# Fit a model that only allows interaction between x1 and x2
|
||||||
|
|||||||
@ -30,7 +30,7 @@ def xgb_get_weights(bst):
|
|||||||
|
|
||||||
def assert_regression_result(results, tol):
|
def assert_regression_result(results, tol):
|
||||||
regression_results = [r for r in results if
|
regression_results = [r for r in results if
|
||||||
r["param"]["objective"] == "reg:linear"]
|
r["param"]["objective"] == "reg:squarederror"]
|
||||||
for res in regression_results:
|
for res in regression_results:
|
||||||
X = scale(res["dataset"].X,
|
X = scale(res["dataset"].X,
|
||||||
with_mean=isinstance(res["dataset"].X, np.ndarray))
|
with_mean=isinstance(res["dataset"].X, np.ndarray))
|
||||||
@ -52,7 +52,7 @@ def assert_regression_result(results, tol):
|
|||||||
# TODO: More robust classification tests
|
# TODO: More robust classification tests
|
||||||
def assert_classification_result(results):
|
def assert_classification_result(results):
|
||||||
classification_results = [r for r in results if
|
classification_results = [r for r in results if
|
||||||
r["param"]["objective"] != "reg:linear"]
|
r["param"]["objective"] != "reg:squarederror"]
|
||||||
for res in classification_results:
|
for res in classification_results:
|
||||||
# Check accuracy is reasonable
|
# Check accuracy is reasonable
|
||||||
assert res["eval"][-1] < 0.5, (res["dataset"].name, res["eval"][-1])
|
assert res["eval"][-1] < 0.5, (res["dataset"].name, res["eval"][-1])
|
||||||
|
|||||||
@ -16,7 +16,8 @@ class TestTreesToDataFrame(unittest.TestCase):
|
|||||||
|
|
||||||
def build_model(self, max_depth, num_round):
|
def build_model(self, max_depth, num_round):
|
||||||
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
param = {'max_depth': max_depth, 'objective': 'binary:logistic', 'silent': False}
|
param = {'max_depth': max_depth, 'objective': 'binary:logistic',
|
||||||
|
'verbosity': 1}
|
||||||
num_round = num_round
|
num_round = num_round
|
||||||
bst = xgb.train(param, dtrain, num_round)
|
bst = xgb.train(param, dtrain, num_round)
|
||||||
return bst
|
return bst
|
||||||
|
|||||||
@ -51,7 +51,7 @@ class TestSHAP(unittest.TestCase):
|
|||||||
|
|
||||||
def fn(max_depth, num_rounds):
|
def fn(max_depth, num_rounds):
|
||||||
# train
|
# train
|
||||||
params = {'max_depth': max_depth, 'eta': 1, 'silent': 1}
|
params = {'max_depth': max_depth, 'eta': 1, 'verbosity': 0}
|
||||||
bst = xgb.train(params, dtrain, num_boost_round=num_rounds)
|
bst = xgb.train(params, dtrain, num_boost_round=num_rounds)
|
||||||
|
|
||||||
# predict
|
# predict
|
||||||
|
|||||||
@ -4,7 +4,7 @@ from scipy.sparse import rand
|
|||||||
|
|
||||||
rng = np.random.RandomState(1)
|
rng = np.random.RandomState(1)
|
||||||
|
|
||||||
param = {'max_depth': 3, 'objective': 'binary:logistic', 'silent': 1}
|
param = {'max_depth': 3, 'objective': 'binary:logistic', 'verbosity': 0}
|
||||||
|
|
||||||
|
|
||||||
def test_sparse_dmatrix_csr():
|
def test_sparse_dmatrix_csr():
|
||||||
|
|||||||
@ -11,18 +11,18 @@ class TestTrainingContinuation(unittest.TestCase):
|
|||||||
num_parallel_tree = 3
|
num_parallel_tree = 3
|
||||||
|
|
||||||
xgb_params_01 = {
|
xgb_params_01 = {
|
||||||
'silent': 1,
|
'verbosity': 0,
|
||||||
'nthread': 1,
|
'nthread': 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
xgb_params_02 = {
|
xgb_params_02 = {
|
||||||
'silent': 1,
|
'verbosity': 0,
|
||||||
'nthread': 1,
|
'nthread': 1,
|
||||||
'num_parallel_tree': num_parallel_tree
|
'num_parallel_tree': num_parallel_tree
|
||||||
}
|
}
|
||||||
|
|
||||||
xgb_params_03 = {
|
xgb_params_03 = {
|
||||||
'silent': 1,
|
'verbosity': 0,
|
||||||
'nthread': 1,
|
'nthread': 1,
|
||||||
'num_class': 5,
|
'num_class': 5,
|
||||||
'num_parallel_tree': num_parallel_tree
|
'num_parallel_tree': num_parallel_tree
|
||||||
|
|||||||
@ -10,7 +10,8 @@ train_data = xgb.DMatrix(np.array([[1]]), label=np.array([1]))
|
|||||||
class TestTreeRegularization(unittest.TestCase):
|
class TestTreeRegularization(unittest.TestCase):
|
||||||
def test_alpha(self):
|
def test_alpha(self):
|
||||||
params = {
|
params = {
|
||||||
'tree_method': 'exact', 'silent': 1, 'objective': 'reg:linear',
|
'tree_method': 'exact', 'verbosity': 0,
|
||||||
|
'objective': 'reg:squarederror',
|
||||||
'eta': 1,
|
'eta': 1,
|
||||||
'lambda': 0,
|
'lambda': 0,
|
||||||
'alpha': 0.1
|
'alpha': 0.1
|
||||||
@ -27,7 +28,8 @@ class TestTreeRegularization(unittest.TestCase):
|
|||||||
|
|
||||||
def test_lambda(self):
|
def test_lambda(self):
|
||||||
params = {
|
params = {
|
||||||
'tree_method': 'exact', 'silent': 1, 'objective': 'reg:linear',
|
'tree_method': 'exact', 'verbosity': 0,
|
||||||
|
'objective': 'reg:squarederror',
|
||||||
'eta': 1,
|
'eta': 1,
|
||||||
'lambda': 1,
|
'lambda': 1,
|
||||||
'alpha': 0
|
'alpha': 0
|
||||||
@ -44,7 +46,8 @@ class TestTreeRegularization(unittest.TestCase):
|
|||||||
|
|
||||||
def test_alpha_and_lambda(self):
|
def test_alpha_and_lambda(self):
|
||||||
params = {
|
params = {
|
||||||
'tree_method': 'exact', 'silent': 1, 'objective': 'reg:linear',
|
'tree_method': 'exact', 'verbosity': 1,
|
||||||
|
'objective': 'reg:squarederror',
|
||||||
'eta': 1,
|
'eta': 1,
|
||||||
'lambda': 1,
|
'lambda': 1,
|
||||||
'alpha': 0.1
|
'alpha': 0.1
|
||||||
|
|||||||
@ -33,7 +33,7 @@ class TestUpdaters(unittest.TestCase):
|
|||||||
'max_bin': [2, 256],
|
'max_bin': [2, 256],
|
||||||
'grow_policy': ['depthwise', 'lossguide'],
|
'grow_policy': ['depthwise', 'lossguide'],
|
||||||
'max_leaves': [64, 0],
|
'max_leaves': [64, 0],
|
||||||
'silent': [1]}
|
'verbosity': [0]}
|
||||||
for param in parameter_combinations(variable_param):
|
for param in parameter_combinations(variable_param):
|
||||||
result = run_suite(param)
|
result = run_suite(param)
|
||||||
assert_results_non_increasing(result, 1e-2)
|
assert_results_non_increasing(result, 1e-2)
|
||||||
@ -45,7 +45,7 @@ class TestUpdaters(unittest.TestCase):
|
|||||||
ag_param = {'max_depth': 2,
|
ag_param = {'max_depth': 2,
|
||||||
'tree_method': 'hist',
|
'tree_method': 'hist',
|
||||||
'eta': 1,
|
'eta': 1,
|
||||||
'silent': 1,
|
'verbosity': 0,
|
||||||
'objective': 'binary:logistic',
|
'objective': 'binary:logistic',
|
||||||
'eval_metric': 'auc'}
|
'eval_metric': 'auc'}
|
||||||
hist_res = {}
|
hist_res = {}
|
||||||
|
|||||||
@ -120,7 +120,7 @@ class TestPandas(unittest.TestCase):
|
|||||||
|
|
||||||
def test_cv_as_pandas(self):
|
def test_cv_as_pandas(self):
|
||||||
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
dm = xgb.DMatrix(dpath + 'agaricus.txt.train')
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
|
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10)
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10)
|
||||||
@ -143,19 +143,19 @@ class TestPandas(unittest.TestCase):
|
|||||||
u'train-error-mean', u'train-error-std'])
|
u'train-error-mean', u'train-error-std'])
|
||||||
assert cv.columns.equals(exp)
|
assert cv.columns.equals(exp)
|
||||||
|
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic', 'eval_metric': 'auc'}
|
'objective': 'binary:logistic', 'eval_metric': 'auc'}
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
|
||||||
assert 'eval_metric' in params
|
assert 'eval_metric' in params
|
||||||
assert 'auc' in cv.columns[0]
|
assert 'auc' in cv.columns[0]
|
||||||
|
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=True)
|
||||||
assert 'eval_metric' in params
|
assert 'eval_metric' in params
|
||||||
assert 'auc' in cv.columns[0]
|
assert 'auc' in cv.columns[0]
|
||||||
|
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
as_pandas=True, early_stopping_rounds=1)
|
as_pandas=True, early_stopping_rounds=1)
|
||||||
@ -163,19 +163,19 @@ class TestPandas(unittest.TestCase):
|
|||||||
assert 'auc' in cv.columns[0]
|
assert 'auc' in cv.columns[0]
|
||||||
assert cv.shape[0] < 10
|
assert cv.shape[0] < 10
|
||||||
|
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
as_pandas=True, metrics='auc')
|
as_pandas=True, metrics='auc')
|
||||||
assert 'auc' in cv.columns[0]
|
assert 'auc' in cv.columns[0]
|
||||||
|
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic'}
|
'objective': 'binary:logistic'}
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
as_pandas=True, metrics=['auc'])
|
as_pandas=True, metrics=['auc'])
|
||||||
assert 'auc' in cv.columns[0]
|
assert 'auc' in cv.columns[0]
|
||||||
|
|
||||||
params = {'max_depth': 2, 'eta': 1, 'silent': 1,
|
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
|
||||||
'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
'objective': 'binary:logistic', 'eval_metric': ['auc']}
|
||||||
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
|
||||||
as_pandas=True, metrics='error')
|
as_pandas=True, metrics='error')
|
||||||
|
|||||||
@ -603,7 +603,8 @@ def test_RFECV():
|
|||||||
# Regression
|
# Regression
|
||||||
X, y = load_boston(return_X_y=True)
|
X, y = load_boston(return_X_y=True)
|
||||||
bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
|
bst = xgb.XGBClassifier(booster='gblinear', learning_rate=0.1,
|
||||||
n_estimators=10, n_jobs=1, objective='reg:linear',
|
n_estimators=10, n_jobs=1,
|
||||||
|
objective='reg:squarederror',
|
||||||
random_state=0, verbosity=0)
|
random_state=0, verbosity=0)
|
||||||
rfecv = RFECV(
|
rfecv = RFECV(
|
||||||
estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error')
|
estimator=bst, step=1, cv=3, scoring='neg_mean_squared_error')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user