style cleanup, incomplete CV

2014-09-05 20:34:41 -07:00 · 2014-09-05 20:34:41 -07:00 · 984102e586
commit 984102e586
parent 2b170ecda4
4 changed files with 148 additions and 69 deletions
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -81,20 +81,28 @@ xgb.predict <- function(booster, dmat, outputmargin = FALSE) {
 ## ----the following are low level iteratively function, not needed if
 ## you do not want to use them ---------------------------------------
-
+# get dmatrix from data, label
-# iteratively update booster with dtrain
+xgb.get.DMatrix <- function(data, label = NULL) {
-xgb.iter.update <- function(booster, dtrain, iter) {
+  inClass <- class(data)
-  if (class(booster) != "xgb.Booster") {
+  if (inClass == "dgCMatrix" || inClass == "matrix") {
-    stop("xgb.iter.update: first argument must be type xgb.Booster")
+    if (is.null(label)) {
      stop("xgboost: need label when data is a matrix")
    }
    dtrain <- xgb.DMatrix(data, label = label)
  } else {
    if (!is.null(label)) {
      warning("xgboost: label will be ignored.")
    }
    if (inClass == "character") {
      dtrain <- xgb.DMatrix(data)
    } else if (inClass == "xgb.DMatrix") {
      dtrain <- data
    } else {
      stop("xgboost: Invalid input of data")
    }
  }
-  if (class(dtrain) != "xgb.DMatrix") {
+  return (dtrain)
    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
  }
  .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, 
        PACKAGE = "xgboost")
  return(TRUE)
 }
 # iteratively update booster with customized statistics
 xgb.iter.boost <- function(booster, dtrain, gpair) {
  if (class(booster) != "xgb.Booster") {
@ -108,8 +116,28 @@ xgb.iter.boost <- function(booster, dtrain, gpair) {
  return(TRUE)
 }
 # iteratively update booster with dtrain
 xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.iter.update: first argument must be type xgb.Booster")
  }
  if (class(dtrain) != "xgb.DMatrix") {
    stop("xgb.iter.update: second argument must be type xgb.DMatrix")
  }
  if (is.null(obj)) {
    .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, 
          PACKAGE = "xgboost")
  } else {
    pred <- xgb.predict(bst, dtrain)
    gpair <- obj(pred, dtrain)
    succ <- xgb.iter.boost(bst, dtrain, gpair)
  }
  return(TRUE)
 }
 # iteratively evaluate one iteration
-xgb.iter.eval <- function(booster, watchlist, iter) {
+xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
  if (class(booster) != "xgb.Booster") {
    stop("xgb.eval: first argument must be type xgb.Booster")
  }
@ -122,18 +150,47 @@ xgb.iter.eval <- function(booster, watchlist, iter) {
    }
  }
  if (length(watchlist) != 0) {
-    evnames <- list()
+    if (is.null(feval)) {
-    for (i in 1:length(watchlist)) {
+      evnames <- list()
-      w <- watchlist[i]
+      for (i in 1:length(watchlist)) {
-      if (length(names(w)) == 0) {
+        w <- watchlist[i]
-        stop("xgb.eval: name tag must be presented for every elements in watchlist")
+        if (length(names(w)) == 0) {
          stop("xgb.eval: name tag must be presented for every elements in watchlist")
        }
        evnames <- append(evnames, names(w))
      }
      msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, 
                   evnames, PACKAGE = "xgboost")
    } else {
      msg <- paste("[", iter, "]", sep="")
      for (j in 1:length(watchlist)) {
        w <- watchlist[j]
        if (length(names(w)) == 0) {
          stop("xgb.eval: name tag must be presented for every elements in watchlist")
        }
        ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
        msg <- paste(msg, "\t", names(w), "-", ret$metric, ":", ret$value, sep="")
      }
      evnames <- append(evnames, names(w))
    }
    msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, 
                 evnames, PACKAGE = "xgboost")
  } else {
    msg <- ""
  }  
  return(msg)
 } 
 #------------------------------------------
 # helper functions for cross validation
 #
 xgb.cv.mknfold <- function(dall, nfold, param, metrics=list(), fpreproc = NULL) {  
  randidx <- sample(1 : xgb.numrow(dall))
  kstep <- length(randidx) / nfold
  idset <- list()
  for (i in 1:nfold) {
    idset = append(idset, randidx[ ((i-1) * kstep + 1) : min(i * kstep, length(randidx)) ])
  }
  ret <- list()
  for (k in 1:nfold) {
  }
 }
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@ -0,0 +1,57 @@
 #' eXtreme Gradient Boosting Training
 #' 
 #' The training function of xgboost
 #'
 #' @param params the list of parameters. Commonly used ones are:
 #' \itemize{
 #'   \item \code{objective} objective function, common ones are
 #'   \itemize{
 #'     \item \code{reg:linear} linear regression
 #'     \item \code{binary:logistic} logistic regression for classification
 #'   }
 #'   \item \code{eta} step size of each boosting step
 #'   \item \code{max_depth} maximum depth of the tree
 #'   \item \code{nthread} number of thread used in training, if not set, all threads are used
 #' }
 #'
 #'   See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for 
 #'   further details. See also inst/examples/demo.R for walkthrough example in R.
 #' @param data takes an \code{xgb.DMatrix} as the input.
 #' @param nrounds the max number of iterations
 #' @param metrics, list of evaluation metrics to be used in corss validation,
 #'   when it is not specified, the evaluation metric is chosen according to objective function.
 #'   Possible options are:
 #' \itemize{
 #'   \item \code{error} binary classification error rate
 #'   \item \code{rmse} Rooted mean square error
 #'   \item \code{logloss} negative log-likelihood function
 #'   \item \code{auc} Area under curve
 #'   \item \code{merror} Exact matching error, used to evaluate multi-class classification
 #' }
 #'
 #' @param obj customized objective function. Returns gradient and second order 
 #'   gradient with given prediction and dtrain, 
 #' @param feval custimized evaluation function. Returns 
 #'   \code{list(metric='metric-name', value='metric-value')} with given 
 #'   prediction and dtrain,
 #' @param ... other parameters to pass to \code{params}.
 #' 
 #' @details 
 #' This is the cross validation function for xgboost
 #'
 #' Parallelization is automatically enabled if OpenMP is present.
 #' Number of threads can also be manually specified via "nthread" parameter.
 #' 
 #' This function only accepts an \code{xgb.DMatrix} object as the input.
 #'
 #' @export
 #'
 xgb.cv <- function(params=list(), data, nrounds, metrics=list(), label = NULL,
                   obj = NULL, feval = NULL, ...) {
  if (typeof(params) != "list") {
    stop("xgb.cv: first argument params must be list")
  }
  dtrain <- xgb.get.DMatrix(data, label)
  params = append(params, list(...))
 }
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@ -16,7 +16,7 @@
 #'
 #'   See \url{https://github.com/tqchen/xgboost/wiki/Parameters} for 
 #'   further details. See also inst/examples/demo.R for walkthrough example in R.
-#' @param dtrain takes an \code{xgb.DMatrix} as the input.
+#' @param data takes an \code{xgb.DMatrix} as the input.
 #' @param nrounds the max number of iterations
 #' @param watchlist what information should be printed when \code{verbose=1} or
 #'   \code{verbose=2}. Watchlist is used to specify validation set monitoring
@ -64,8 +64,9 @@
 #' bst <- xgb.train(param, dtrain, nround = 2, watchlist, logregobj, evalerror)
 #' @export
 #' 
-xgb.train <- function(params=list(), dtrain, nrounds, watchlist = list(), 
+xgb.train <- function(params=list(), data, nrounds, watchlist = list(), 
                      obj = NULL, feval = NULL, ...) {
  dtrain <- data
  if (typeof(params) != "list") {
    stop("xgb.train: first argument params must be list")
  }
@ -75,37 +76,10 @@ xgb.train <- function(params=list(), dtrain, nrounds, watchlist = list(),
  params = append(params, list(...))
  bst <- xgb.Booster(params, append(watchlist, dtrain))
  for (i in 1:nrounds) {
-    if (is.null(obj)) {
+    succ <- xgb.iter.update(bst, dtrain, i - 1, obj)
      succ <- xgb.iter.update(bst, dtrain, i - 1)
    } else {
      pred <- xgb.predict(bst, dtrain)
      gpair <- obj(pred, dtrain)
      succ <- xgb.iter.boost(bst, dtrain, gpair)
    }
    if (length(watchlist) != 0) {
-      if (is.null(feval)) {
+      msg <- xgb.iter.eval(bst, watchlist, i - 1, feval)
-        msg <- xgb.iter.eval(bst, watchlist, i - 1)
+      cat(paste(msg, "\n", sep=""))
        cat(msg)
        cat("\n")
      } else {
        cat("[")
        cat(i)
        cat("]")
        for (j in 1:length(watchlist)) {
          w <- watchlist[j]
          if (length(names(w)) == 0) {
            stop("xgb.eval: name tag must be presented for every elements in watchlist")
          }
          ret <- feval(xgb.predict(bst, w[[1]]), w[[1]])
          cat("\t")
          cat(names(w))
          cat("-")
          cat(ret$metric)
          cat(":")
          cat(ret$value)
        }
        cat("\n")
      }
    }
  }
  return(bst)
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@ -40,19 +40,7 @@
 #' 
 xgboost <- function(data = NULL, label = NULL, params = list(), nrounds, 
                    verbose = 1, ...) {
-  inClass <- class(data)
+  dtrain <- xgb.get.DMatrix(data, label)
  if (inClass == "dgCMatrix" || inClass == "matrix") {
    if (is.null(label)) 
      stop("xgboost: need label when data is a matrix")
    dtrain <- xgb.DMatrix(data, label = label)
  } else {
    if (!is.null(label)) 
      warning("xgboost: label will be ignored.")
    if (inClass == "character") 
      dtrain <- xgb.DMatrix(data) else if (inClass == "xgb.DMatrix") 
      dtrain <- data else stop("xgboost: Invalid input of data")
  }
  if (verbose > 1) {
    silent <- 0 
  } else {
@ -62,8 +50,11 @@ xgboost <- function(data = NULL, label = NULL, params = list(), nrounds,
  params <- append(params, list(silent = silent))
  params <- append(params, list(...))
-  if (verbose > 0) 
+  if (verbose > 0) {
-    watchlist <- list(train = dtrain) else watchlist <- list()
+    watchlist <- list(train = dtrain)
  } else {
    watchlist <- list()
  }
  bst <- xgb.train(params, dtrain, nrounds, watchlist)