diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index 6f6c1a900..636ad3cad 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -139,11 +139,11 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(), params = append(params, list(...)) # Early stopping - if (!is.null(feval) && is.null(maximize)) + if (!is.null(feval) && is.null(maximize) && !is.null(earlyStopRound)) stop('Please set maximize to note whether the model is maximizing the evaluation or not.') if (length(watchlist) == 0 && !is.null(earlyStopRound)) stop('For early stopping you need at least one set in watchlist.') - if (is.null(maximize) && is.null(params$eval_metric)) + if (is.null(maximize) && is.null(params$eval_metric) && !is.null(earlyStopRound)) stop('Please set maximize to note whether the model is maximizing the evaluation or not.') if (is.null(maximize)) { diff --git a/R-package/demo/00Index b/R-package/demo/00Index index 969da0d91..f0b41ec2a 100644 --- a/R-package/demo/00Index +++ b/R-package/demo/00Index @@ -6,3 +6,4 @@ generalized_linear_model Generalized Linear Model cross_validation Cross validation create_sparse_matrix Create Sparse Matrix predict_leaf_indices Predicting the corresponding leaves +early_Stopping Early Stop in training diff --git a/R-package/demo/early_Stopping.R b/R-package/demo/early_Stopping.R new file mode 100644 index 000000000..3253c3828 --- /dev/null +++ b/R-package/demo/early_Stopping.R @@ -0,0 +1,58 @@ +require(xgboost) +# load in the agaricus dataset +data(agaricus.train, package='xgboost') +data(agaricus.test, package='xgboost') +dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) +dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) +# note: for customized objective function, we leave objective as default +# note: what we are getting is margin value in prediction +# you must know what you are doing +param <- list(max.depth=2,eta=1,nthread = 2, silent=1) +watchlist <- list(eval = dtest) +num_round <- 20 +# user define objective function, given prediction, return gradient and second order gradient +# this is loglikelihood loss +logregobj <- function(preds, dtrain) { + labels <- getinfo(dtrain, "label") + preds <- 1/(1 + exp(-preds)) + grad <- preds - labels + hess <- preds * (1 - preds) + return(list(grad = grad, hess = hess)) +} +# user defined evaluation function, return a pair metric_name, result +# NOTE: when you do customized loss function, the default prediction value is margin +# this may make buildin evalution metric not function properly +# for example, we are doing logistic loss, the prediction is score before logistic transformation +# the buildin evaluation error assumes input is after logistic transformation +# Take this in mind when you use the customization, and maybe you need write customized evaluation function +evalerror <- function(preds, dtrain) { + labels <- getinfo(dtrain, "label") + err <- as.numeric(sum(labels != (preds > 0)))/length(labels) + return(list(metric = "error", value = err)) +} +print ('start training with user customized objective') +# training with customized objective, we can also do step by step training +# simply look at xgboost.py's implementation of train +bst <- xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror, maximize = FALSE, + earlyStopRound = 3) +# +# there can be cases where you want additional information +# being considered besides the property of DMatrix you can get by getinfo +# you can set additional information as attributes if DMatrix +# set label attribute of dtrain to be label, we use label as an example, it can be anything +attr(dtrain, 'label') <- getinfo(dtrain, 'label') +# this is new customized objective, where you can access things you set +# same thing applies to customized evaluation function +logregobjattr <- function(preds, dtrain) { + # now you can access the attribute in customized function + labels <- attr(dtrain, 'label') + preds <- 1/(1 + exp(-preds)) + grad <- preds - labels + hess <- preds * (1 - preds) + return(list(grad = grad, hess = hess)) +} +print ('start training with user customized objective, with additional attributes in DMatrix') +# training with customized objective, we can also do step by step training +# simply look at xgboost.py's implementation of train +bst <- xgb.train(param, dtrain, num_round, watchlist, logregobjattr, evalerror, maximize = FALSE, + earlyStopRound = 3) \ No newline at end of file