From 419e4dbda6ac69d7c905663dcecc3d18800ed31f Mon Sep 17 00:00:00 2001
From: hetong007 <hetong007@gmail.com>
Date: Wed, 6 May 2015 15:14:29 -0700
Subject: [PATCH] add demo for early_stopping in R

---
 R-package/R/xgb.train.R         | 56 ++++++++++++++++-----------------
 R-package/demo/early_Stopping.R | 23 +-------------
 2 files changed, 29 insertions(+), 50 deletions(-)

diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 636ad3cad..01de306a0 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -139,31 +139,34 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
   params = append(params, list(...))
   
   # Early stopping
-  if (!is.null(feval) && is.null(maximize) && !is.null(earlyStopRound))
-    stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
-  if (length(watchlist) == 0 && !is.null(earlyStopRound))
-    stop('For early stopping you need at least one set in watchlist.')
-  if (is.null(maximize) && is.null(params$eval_metric) && !is.null(earlyStopRound))
-    stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
-  if (is.null(maximize))
-  {
-    if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
-      maximize = FALSE
-    } else {
-      maximize = TRUE
+  if (!is.null(earlyStopRound)){
+    if (!is.null(feval) && is.null(maximize))
+      stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
+    if (length(watchlist) == 0)
+      stop('For early stopping you need at least one set in watchlist.')
+    if (is.null(maximize) && is.null(params$eval_metric))
+      stop('Please set maximize to note whether the model is maximizing the evaluation or not.')
+    if (is.null(maximize))
+    {
+      if (params$eval_metric %in% c('rmse','logloss','error','merror','mlogloss')) {
+        maximize = FALSE
+      } else {
+        maximize = TRUE
+      }
     }
+    
+    if (maximize) {
+      bestScore = 0
+    } else {
+      bestScore = Inf
+    }
+    bestInd = 0
+    earlyStopflag = FALSE
+    
+    if (length(watchlist)>1)
+      warning('Only the first data set in watchlist is used for early stopping process.')
   }
   
-  if (maximize) {
-    bestScore = 0
-  } else {
-    bestScore = Inf
-  }
-  bestInd = 0
-  earlyStopflag = FALSE
-  
-  if (length(watchlist)>1 && !is.null(earlyStopRound))
-    warning('Only the first data set in watchlist is used for early stopping process.')
   
   handle <- xgb.Booster(params, append(watchlist, dtrain))
   bst <- xgb.handleToBooster(handle)
@@ -174,8 +177,7 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
       cat(paste(msg, "\n", sep=""))
       if (!is.null(earlyStopRound))
       {
-        score = strsplit(msg,'\\s+')[[1]][1]
-        score = strsplit(score,':')[[1]][2]
+        score = strsplit(msg,':|\\s+')[[1]][3]
         score = as.numeric(score)
         if ((maximize && score>bestScore) || (!maximize && score<bestScore)) {
           bestScore = score
@@ -183,14 +185,12 @@ xgb.train <- function(params=list(), data, nrounds, watchlist = list(),
         } else {
           if (i-bestInd>earlyStopRound) {
             earlyStopflag = TRUE
+            cat('Stopping. Best iteration:',bestInd)
+            break
           }
         }
       }
     }
-    if (earlyStopflag) {
-      cat('Stopping. Best iteration:',bestInd)
-      break
-    }
   }
   bst <- xgb.Booster.check(bst)
   if (!is.null(earlyStopRound)) {
diff --git a/R-package/demo/early_Stopping.R b/R-package/demo/early_Stopping.R
index 3253c3828..4cab385ca 100644
--- a/R-package/demo/early_Stopping.R
+++ b/R-package/demo/early_Stopping.R
@@ -30,29 +30,8 @@ evalerror <- function(preds, dtrain) {
   err <- as.numeric(sum(labels != (preds > 0)))/length(labels)
   return(list(metric = "error", value = err))
 }
-print ('start training with user customized objective')
+print ('start training with early Stopping setting')
 # training with customized objective, we can also do step by step training
 # simply look at xgboost.py's implementation of train
 bst <- xgb.train(param, dtrain, num_round, watchlist, logregobj, evalerror, maximize = FALSE,
                  earlyStopRound = 3)
-#
-# there can be cases where you want additional information
-# being considered besides the property of DMatrix you can get by getinfo
-# you can set additional information as attributes if DMatrix
-# set label attribute of dtrain to be label, we use label as an example, it can be anything
-attr(dtrain, 'label') <- getinfo(dtrain, 'label')
-# this is new customized objective, where you can access things you set
-# same thing applies to customized evaluation function
-logregobjattr <- function(preds, dtrain) {
-  # now you can access the attribute in customized function
-  labels <- attr(dtrain, 'label')
-  preds <- 1/(1 + exp(-preds))
-  grad <- preds - labels
-  hess <- preds * (1 - preds)
-  return(list(grad = grad, hess = hess))
-}
-print ('start training with user customized objective, with additional attributes in DMatrix')
-# training with customized objective, we can also do step by step training
-# simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist, logregobjattr, evalerror, maximize = FALSE,
-                 earlyStopRound = 3)
\ No newline at end of file