[R] add parameter deprecation related utilities; code style

2016-06-27 01:49:51 -05:00 · 2016-06-27 01:49:51 -05:00 · c342614a81
commit c342614a81
parent 76650c096f
1 changed files with 97 additions and 31 deletions
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@ -27,7 +27,7 @@ NVL <- function(x, val) {
 # Merges booster params with whatever is provided in ...
 # plus runs some checks
-check.params <- function(params, ...) {
+check.booster.params <- function(params, ...) {
  if (typeof(params) != "list") 
    stop("params must be a list")
@ -35,25 +35,26 @@ check.params <- function(params, ...) {
  names(params) <- gsub("\\.", "_", names(params))
  # merge parameters from the params and the dots-expansion
-  dot.params <- list(...)
+  dot_params <- list(...)
-  names(dot.params) <- gsub("\\.", "_", names(dot.params))
+  names(dot_params) <- gsub("\\.", "_", names(dot_params))
-  if (length(intersect(names(params), names(dot.params))) > 0)
+  if (length(intersect(names(params),
                       names(dot_params))) > 0)
    stop("Same parameters in 'params' and in the call are not allowed. Please check your 'params' list.")
-  params <- c(params, dot.params)
+  params <- c(params, dot_params)
-  # only multiple eval_metric's make sense
+  # providing a parameter multiple times only makes sense for 'eval_metric'
-  name.freqs <- table(names(params))
+  name_freqs <- table(names(params))
-  multi.names <- setdiff( names(name.freqs[name.freqs>1]), 'eval_metric')
+  multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric')
-  if (length(multi.names) > 0) {
+  if (length(multi_names) > 0) {
-    warning("The following parameters (other than 'eval_metric') were provided multiple times:\n\t",
+    warning("The following parameters were provided multiple times:\n\t",
-            paste(multi.names, collapse=', '), "\n  Only the last value for each of them will be used.\n")
+            paste(multi_names, collapse=', '), "\n  Only the last value for each of them will be used.\n")
    # While xgboost itself would choose the last value for a multi-parameter, 
    # will do some clean-up here b/c multi-parameters could be used further in R code, and R would 
    # pick the 1st (not the last) value when multiple elements with the same name are present in a list.
-    for (n in multi.names) {
+    for (n in multi_names) {
-      del.idx <- which(n == names(params))
+      del_idx <- which(n == names(params))
-      del.idx <- del.idx[-length(del.idx)]
+      del_idx <- del_idx[-length(del_idx)]
-      params[[del.idx]] <- NULL
+      params[[del_idx]] <- NULL
    }
  }
@ -99,7 +100,7 @@ check.custom.eval <- function(env = parent.frame()) {
  if (!is.null(env$feval) && is.null(env$maximize))
    stop("Please set 'maximize' to indicate whether the metric needs to be maximized or not")
-  # handle the situation when custom eval function was provided through params
+  # handle a situation when custom eval function was provided through params
  if (!is.null(env$params[['eval_metric']]) &&
      typeof(env$params$eval_metric) == 'closure') {
    env$feval <- env$params$eval_metric
@ -136,7 +137,8 @@ xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
 # with the names in a 'datasetname-metricname' format.
 xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
  if (class(booster) != "xgb.Booster.handle")
-    stop("first argument must be type xgb.Booster.handle")
+    stop("first argument type must be xgb.Booster.handle")
  if (length(watchlist) == 0) 
    return(NULL)
@ -171,15 +173,15 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
  # cannot do it for rank
  if (exists('objective', where=params) &&
      is.character(params$objective) &&
-      strtrim(params$objective, 5) == 'rank:')
+      strtrim(params$objective, 5) == 'rank:') {
    stop("\n\tAutomatic generation of CV-folds is not implemented for ranking!\n",
         "\tConsider providing pre-computed CV-folds through the 'folds=' parameter.\n")
-  
+  }
  # shuffle
-  rnd.idx <- sample(1:nrows)
+  rnd_idx <- sample(1:nrows)
  if (stratified &&
-      length(label) == length(rnd.idx)) {
+      length(label) == length(rnd_idx)) {
-    y <- label[rnd.idx]
+    y <- label[rnd_idx]
    # WARNING: some heuristic logic is employed to identify classification setting!
    #  - For classification, need to convert y labels to factor before making the folds,
    #    and then do stratification by factor levels.
@ -200,13 +202,13 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
    folds <- xgb.createFolds(y, nfold)
  } else {
    # make simple non-stratified folds
-    kstep <- length(rnd.idx) %/% nfold
+    kstep <- length(rnd_idx) %/% nfold
    folds <- list()
    for (i in 1:(nfold - 1)) {
-      folds[[i]] <- rnd.idx[1:kstep]
+      folds[[i]] <- rnd_idx[1:kstep]
-      rnd.idx <- rnd.idx[-(1:kstep)]
+      rnd_idx <- rnd_idx[-(1:kstep)]
    }
-    folds[[nfold]] <- rnd.idx
+    folds[[nfold]] <- rnd_idx
  }
  return(folds)
 }
@ -255,9 +257,73 @@ xgb.createFolds <- function(y, k = 10)
      ## shuffle the integers for fold assignment and assign to this classes's data
      foldVector[which(y == dimnames(numInClass)$y[i])] <- sample(seqVector)
    }
-  } else foldVector <- seq(along = y)
+  } else {
    foldVector <- seq(along = y)
  }
  out <- split(seq(along = y), foldVector)
  names(out) <- NULL
  out
 }
 #
 # Deprectaion notice utilities ------------------------------------------------
 #
 #' Deprecation notices.
 #' 
 #' At this time, some of the parameter names were changed in order to make the code style more uniform.
 #' The deprecated parameters would be removed in the next release.
 #' 
 #' To see all the current deprecated and new parameters, check the \code{xgboost:::depr_par_lut} table.
 #' 
 #' A deprecation warning is shown when any of the deprecated parameters is used in a call. 
 #' An additional warning is shown when there was a partial match to a deprecated parameter 
 #' (as R is able to partially match parameter names).
 #' 
 #' @name xgboost-deprecated
 NULL
 # Lookup table for the deprecated parameters bookkeeping
 depr_par_lut <- matrix(c(
  'print.every.n', 'print_every_n', 
  'early.stop.round', 'early_stopping_rounds',
  'training.data', 'data',
  'with.stats', 'with_stats',
  'numberOfClusters', 'n_clusters',
  'features.keep', 'features_keep',
  'plot.height','plot_height',
  'plot.width','plot_width',
  'dummy', 'DUMMY'
 ), ncol=2, byrow = TRUE)
 colnames(depr_par_lut) <- c('old', 'new')
 # Checks the dot-parameters for deprecated names
 # (including partial matching), gives a deprecation warning,
 # and sets new parameters to the old parameters' values within its parent frame.
 # WARNING: has side-effects
 check.deprecation <- function(..., env = parent.frame()) {
  pars <- list(...)
  # exact and partial matches
  all_match <- pmatch(names(pars), depr_par_lut[,1])
  # indices of matched pars' names
  idx_pars <- which(!is.na(all_match))
  if (length(idx_pars) == 0) return()
  # indices of matched LUT rows
  idx_lut <- all_match[idx_pars]
  # which of idx_lut were the exact matches?
  ex_match <- depr_par_lut[idx_lut,1] %in% names(pars)
  for (i in seq_along(idx_pars)) {
    pars_par <- names(pars)[idx_pars[i]]
    old_par <- depr_par_lut[idx_lut[i], 1]
    new_par <- depr_par_lut[idx_lut[i], 2]
    if (!ex_match[i]) {
      warning("'", pars_par, "' was partially matched to '", old_par,"'")
    }
    .Deprecated(new_par, old=old_par, package = 'xgboost')
    if (new_par != 'NULL') {
      eval(parse(text = paste(new_par, '<-', pars[[pars_par]])), envir = env)
    }
  }
 }