From 419e4dbda6ac69d7c905663dcecc3d18800ed31f Mon Sep 17 00:00:00 2001 From: hetong007 Date: Wed, 6 May 2015 15:14:29 -0700 Subject: [PATCH] add demo for early_stopping in R --- R-package/R/xgb.train.R | 56 ++++++++++++++++----------------- R-package/demo/early_Stopping.R | 23 +------------- 2 files changed, 29 insertions(+), 50 deletions(-) diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index 636ad3cad..01de306a0 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -139,31 +139,34 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(), params = append(params, list(...)) # Early stopping - if (!is.null(feval) && is.null(maximize) && !is.null(earlyStopRound)) - stop('Please set maximize to note whether the model is maximizing the evaluation or not.') - if (length(watchlist) == 0 && !is.null(earlyStopRound)) - stop('For early stopping you need at least one set in watchlist.') - if (is.null(maximize) && is.null(params$eval_metric) && !is.null(earlyStopRound)) - stop('Please set maximize to note whether the model is maximizing the evaluation or not.') - if (is.null(maximize)) - { - if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) { - maximize = FALSE - } else { - maximize = TRUE + if (!is.null(earlyStopRound)){ + if (!is.null(feval) && is.null(maximize)) + stop('Please set maximize to note whether the model is maximizing the evaluation or not.') + if (length(watchlist) == 0) + stop('For early stopping you need at least one set in watchlist.') + if (is.null(maximize) && is.null(params$eval_metric)) + stop('Please set maximize to note whether the model is maximizing the evaluation or not.') + if (is.null(maximize)) + { + if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) { + maximize = FALSE + } else { + maximize = TRUE + } } + + if (maximize) { + bestScore = 0 + } else { + bestScore = Inf + } + bestInd = 0 + earlyStopflag = FALSE + + if (length(watchlist)>1) + warning('Only the first data set in watchlist is used for early stopping process.') } - if (maximize) { - bestScore = 0 - } else { - bestScore = Inf - } - bestInd = 0 - earlyStopflag = FALSE - - if (length(watchlist)>1 && !is.null(earlyStopRound)) - warning('Only the first data set in watchlist is used for early stopping process.') handle <- xgb.Booster(params, append(watchlist, dtrain)) bst <- xgb.handleToBooster(handle) @@ -174,8 +177,7 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(), cat(paste(msg, "\n", sep="")) if (!is.null(earlyStopRound)) { - score = strsplit(msg,'\\s+')[[1]][1] - score = strsplit(score,':')[[1]][2] + score = strsplit(msg,':|\\s+')[[1]][3] score = as.numeric(score) if ((maximize && score>bestScore) || (!maximize && scoreearlyStopRound) { earlyStopflag = TRUE + cat('Stopping. Best iteration:',bestInd) + break } } } } - if (earlyStopflag) { - cat('Stopping. Best iteration:',bestInd) - break - } } bst <- xgb.Booster.check(bst) if (!is.null(earlyStopRound)) { diff --git a/R-package/demo/early_Stopping.R b/R-package/demo/early_Stopping.R index 3253c3828..4cab385ca 100644 --- a/R-package/demo/early_Stopping.R +++ b/R-package/demo/early_Stopping.R @@ -30,29 +30,8 @@ evalerror <- function(preds, dtrain) { err <- as.numeric(sum(labels != (preds > 0)))/length(labels) return(list(metric = "error", value = err)) } -print ('start training with user customized objective') +print ('start training with early Stopping setting') # training with customized objective, we can also do step by step training # simply look at xgboost.py's implementation of train bst <- xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror, maximize = FALSE, earlyStopRound = 3) -# -# there can be cases where you want additional information -# being considered besides the property of DMatrix you can get by getinfo -# you can set additional information as attributes if DMatrix -# set label attribute of dtrain to be label, we use label as an example, it can be anything -attr(dtrain, 'label') <- getinfo(dtrain, 'label') -# this is new customized objective, where you can access things you set -# same thing applies to customized evaluation function -logregobjattr <- function(preds, dtrain) { - # now you can access the attribute in customized function - labels <- attr(dtrain, 'label') - preds <- 1/(1 + exp(-preds)) - grad <- preds - labels - hess <- preds * (1 - preds) - return(list(grad = grad, hess = hess)) -} -print ('start training with user customized objective, with additional attributes in DMatrix') -# training with customized objective, we can also do step by step training -# simply look at xgboost.py's implementation of train -bst <- xgb.train(param, dtrain, num_round, watchlist, logregobjattr, evalerror, maximize = FALSE, - earlyStopRound = 3) \ No newline at end of file