[R-package] various fixes for R CMD check (#1328)

* [R] fix xgb.create.features * [R] fixes for R CMD check
2016-07-04 12:40:35 -05:00
parent f8d23b97be
commit 11efa038bd
22 changed files with 49 additions and 39 deletions
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -20,6 +20,7 @@ BugReports: https://github.com/dmlc/xgboost/issues
 VignetteBuilder: knitr
 Suggests:
    knitr,
+    rmarkdown,
    ggplot2 (>= 1.0.1),
    DiagrammeR (>= 0.8.1),
    Ckmeans.1d.dp (>= 3.3.1),
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -7,6 +7,9 @@ S3method(dimnames,xgb.DMatrix)
 S3method(getinfo,xgb.DMatrix)
 S3method(predict,xgb.Booster)
 S3method(predict,xgb.Booster.handle)
+S3method(print,xgb.Booster)
+S3method(print,xgb.DMatrix)
+S3method(print,xgb.cv.synchronous)
 S3method(setinfo,xgb.DMatrix)
 S3method(slice,xgb.DMatrix)
 export("xgb.attr<-")
@@ -19,9 +22,6 @@ export(cb.print.evaluation)
 export(cb.reset.parameters)
 export(cb.save.model)
 export(getinfo)
-export(print.xgb.Booster)
-export(print.xgb.DMatrix)
-export(print.xgb.cv.synchronous)
 export(setinfo)
 export(slice)
 export(xgb.DMatrix)
@@ -55,10 +55,14 @@ importFrom(data.table,data.table)
 importFrom(data.table,rbindlist)
 importFrom(data.table,setnames)
 importFrom(magrittr,"%>%")
+importFrom(stats,predict)
 importFrom(stringr,str_detect)
 importFrom(stringr,str_extract)
 importFrom(stringr,str_match)
 importFrom(stringr,str_replace)
 importFrom(stringr,str_replace_all)
 importFrom(stringr,str_split)
+importFrom(utils,object.size)
+importFrom(utils,str)
+importFrom(utils,tail)
 useDynLib(xgboost)
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -178,7 +178,7 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
 #' @rdname predict.xgb.Booster
 #' @export
 predict.xgb.Booster <- function(object, newdata, missing = NA,
-    outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE) {
+    outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {

  object <- xgb.Booster.check(object, saveraw = FALSE)
  if (class(newdata) != "xgb.DMatrix")
@@ -245,7 +245,7 @@ predict.xgb.Booster.handle <- function(object, ...) {
 #' and its serialization is handled extrnally.
 #' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't 
 #' change the value of that parameter for a model. 
-#' Use \code{\link{`xgb.parameters<-`}} to set or change model parameters.
+#' Use \code{\link{xgb.parameters<-}} to set or change model parameters.
 #' 
 #' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
 #' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
@@ -414,6 +414,7 @@ xgb.ntree <- function(bst) {
 #' print(bst)
 #' print(bst, verbose=TRUE)
 #'
+#' @method print xgb.Booster 
 #' @export
 print.xgb.Booster <- function(x, verbose=FALSE, ...) {
  cat('##### xgb.Booster\n')
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -186,7 +186,7 @@ getinfo <- function(object, ...) UseMethod("getinfo")

 #' @rdname getinfo
 #' @export
-getinfo.xgb.DMatrix <- function(object, name) {
+getinfo.xgb.DMatrix <- function(object, name, ...) {
  if (typeof(name) != "character" ||
      length(name) != 1 ||
      !name %in% c('label', 'weight', 'base_margin', 'nrow')) {
@@ -237,7 +237,7 @@ setinfo <- function(object, ...) UseMethod("setinfo")

 #' @rdname setinfo
 #' @export
-setinfo.xgb.DMatrix <- function(object, name, info) {
+setinfo.xgb.DMatrix <- function(object, name, info, ...) {
  if (name == "label") {
    if (length(info) != nrow(object))
      stop("The length of labels must equal to the number of rows in the input data")
@@ -341,6 +341,8 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
 #' 
 #' dtrain
 #' print(dtrain, verbose=TRUE)
+#' 
+#' @method print xgb.DMatrix
 #' @export
 print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
  cat('xgb.DMatrix  dim:', nrow(x), 'x', ncol(x), ' info: ')
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@@ -14,7 +14,7 @@
 #' \strong{Practical Lessons from Predicting Clicks on Ads at Facebook}
 #' 
 #' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers, 
-#' Joaquin Quiñonero Candela)}
+#' Joaquin Quinonero Candela)}
 #'  
 #' International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
 #' 
@@ -22,7 +22,7 @@
 #' 
 #' Extract explaining the method:
 #' 
-#' "\emph{We found that boosted decision trees are a powerful and very
+#' "We found that boosted decision trees are a powerful and very
 #' convenient way to implement non-linear and tuple transformations
 #' of the kind we just described. We treat each individual
 #' tree as a categorical feature that takes as value the
@@ -43,7 +43,7 @@
 #' based transformation as a supervised feature encoding that
 #' converts a real-valued vector into a compact binary-valued
 #' vector. A traversal from root node to a leaf node represents
-#' a rule on certain features.}"
+#' a rule on certain features."
 #' 
 #' @examples
 #' data(agaricus.train, package='xgboost')
@@ -78,12 +78,7 @@
 #' @export
 xgb.create.features <- function(model, data, ...){
  check.deprecation(...)
-  pred_with_leaf = predict(model, data, predleaf = TRUE)
-  cols <- list()
-  for(i in 1:length(trees)){
-    # max is not the real max but it s not important for the purpose of adding features
-    leaf_id <- sort(unique(pred_with_leaf[,i]))
-    cols[[i]] <- factor(x = pred_with_leaf[,i], level = leaf_id)
-  }
-  cBind(data, sparse.model.matrix( ~ . -1, as.data.frame(cols)))
+  pred_with_leaf <- predict(model, data, predleaf = TRUE)
+  cols <- lapply(as.data.frame(pred_with_leaf), factor)
+  cBind(data, sparse.model.matrix( ~ . -1, cols))
 }
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -171,7 +171,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
  # CV-predictions callback
  if (prediction &&
      !has.callbacks(callbacks, 'cb.cv.predict')) {
-    callbacks <- add.cb(callbacks, cb.cv.predict(save_model=FALSE))
+    callbacks <- add.cb(callbacks, cb.cv.predict(save_models=FALSE))
  }
  # Sort the callbacks into categories
  cb <- categorize.callbacks(callbacks)
@@ -253,6 +253,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
 #' print(cv, verbose=TRUE)
 #' 
 #' @rdname print.xgb.cv
+#' @method print xgb.cv.synchronous
 #' @export
 print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) {
  cat('##### xgb.cv ', length(x$folds), '-folds\n', sep='')
--- a/R-package/R/xgb.importance.R
+++ b/R-package/R/xgb.importance.R
@@ -103,4 +103,4 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe
 # Avoid error messages during CRAN check.
 # The reason is that these variables are never declared
 # They are mainly column names inferred by Data.table...
-globalVariables(c(".", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover"))
+globalVariables(c(".", ".N", "Gain", "Frequency", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover"))
--- a/R-package/R/xgb.plot.deepness.R
+++ b/R-package/R/xgb.plot.deepness.R
@@ -2,6 +2,7 @@
 #' 
 #' Plot multiple graph aligned by rows and columns.
 #'
+#' @param ... the plots
 #' @param cols number of columns
 #' @return NULL
 multiplot <- function(..., cols = 1) {
@@ -40,7 +41,7 @@ edge.parser <- function(element) {
 }

 #' Extract path from root to leaf from data.table
-#' @param dt.tree data.table containing the nodes and edges of the trees
+#' @param dt_tree data.table containing the nodes and edges of the trees
 get.paths.to.leaf <- function(dt_tree) {
  dt.not.leaf.edges <-
    dt_tree[Feature != "Leaf",.(ID, Yes, Tree)] %>% list(dt_tree[Feature != "Leaf",.(ID, No, Tree)]) %>% rbindlist(use.names = F)
@@ -149,6 +150,6 @@ xgb.plot.deepness <- function(model = NULL) {
 # They are mainly column names inferred by Data.table...
 globalVariables(
  c(
-    "Feature", "Count", "ggplot", "aes", "geom_bar", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "ID", "Yes", "No", "Tree"
+    ".N", "N", "size", "Feature", "Count", "ggplot", "aes", "geom_bar", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "ID", "Yes", "No", "Tree"
  )
 )
--- a/R-package/R/xgb.plot.multi.trees.R
+++ b/R-package/R/xgb.plot.multi.trees.R
@@ -103,6 +103,6 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,

 globalVariables(
  c(
-    "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
+    ".N", "N", "From", "To", "Text", "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
  )
 )
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -95,6 +95,8 @@ NULL
 #' @importFrom stringr str_replace
 #' @importFrom stringr str_replace_all
 #' @importFrom stringr str_split
+#' @importFrom utils object.size str tail
+#' @importFrom stats predict
 #' 
 #' @import methods
 #' @useDynLib xgboost
--- a/R-package/man/get.paths.to.leaf.Rd
+++ b/R-package/man/get.paths.to.leaf.Rd
@@ -7,7 +7,7 @@
 get.paths.to.leaf(dt_tree)
 }
 \arguments{
-\item{dt.tree}{data.table containing the nodes and edges of the trees}
+\item{dt_tree}{data.table containing the nodes and edges of the trees}
 }
 \description{
 Extract path from root to leaf from data.table
--- a/R-package/man/getinfo.Rd
+++ b/R-package/man/getinfo.Rd
@@ -7,7 +7,7 @@
 \usage{
 getinfo(object, ...)

-\method{getinfo}{xgb.DMatrix}(object, name)
+\method{getinfo}{xgb.DMatrix}(object, name, ...)
 }
 \arguments{
 \item{object}{Object of class \code{xgb.DMatrix}}
--- a/R-package/man/multiplot.Rd
+++ b/R-package/man/multiplot.Rd
@@ -7,6 +7,8 @@
 multiplot(..., cols = 1)
 }
 \arguments{
+\item{...}{the plots}
+
 \item{cols}{number of columns}
 }
 \description{
--- a/R-package/man/predict.xgb.Booster.Rd
+++ b/R-package/man/predict.xgb.Booster.Rd
@@ -7,7 +7,7 @@
 \usage{
 \method{predict}{xgb.Booster}(object, newdata, missing = NA,
  outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE,
-  reshape = FALSE)
+  reshape = FALSE, ...)

 \method{predict}{xgb.Booster.handle}(object, ...)
 }
--- a/R-package/man/print.xgb.Booster.Rd
+++ b/R-package/man/print.xgb.Booster.Rd
@@ -4,7 +4,7 @@
 \alias{print.xgb.Booster}
 \title{Print xgb.Booster}
 \usage{
-print.xgb.Booster(x, verbose = FALSE, ...)
+\method{print}{xgb.Booster}(x, verbose = FALSE, ...)
 }
 \arguments{
 \item{x}{an xgb.Booster object}
--- a/R-package/man/print.xgb.DMatrix.Rd
+++ b/R-package/man/print.xgb.DMatrix.Rd
@@ -4,7 +4,7 @@
 \alias{print.xgb.DMatrix}
 \title{Print xgb.DMatrix}
 \usage{
-print.xgb.DMatrix(x, verbose = FALSE, ...)
+\method{print}{xgb.DMatrix}(x, verbose = FALSE, ...)
 }
 \arguments{
 \item{x}{an xgb.DMatrix object}
@@ -24,5 +24,6 @@ dtrain <- xgb.DMatrix(train$data, label=train$label)

 dtrain
 print(dtrain, verbose=TRUE)
+
 }

--- a/R-package/man/print.xgb.cv.Rd
+++ b/R-package/man/print.xgb.cv.Rd
@@ -4,7 +4,7 @@
 \alias{print.xgb.cv.synchronous}
 \title{Print xgb.cv result}
 \usage{
-print.xgb.cv.synchronous(x, verbose = FALSE, ...)
+\method{print}{xgb.cv.synchronous}(x, verbose = FALSE, ...)
 }
 \arguments{
 \item{x}{an \code{xgb.cv.synchronous} object}
--- a/R-package/man/setinfo.Rd
+++ b/R-package/man/setinfo.Rd
@@ -7,7 +7,7 @@
 \usage{
 setinfo(object, ...)

-\method{setinfo}{xgb.DMatrix}(object, name, info)
+\method{setinfo}{xgb.DMatrix}(object, name, info, ...)
 }
 \arguments{
 \item{object}{Object of class "xgb.DMatrix"}
--- a/R-package/man/xgb.attr.Rd
+++ b/R-package/man/xgb.attr.Rd
@@ -48,7 +48,7 @@ would not be saved by \code{xgb.save} because an xgboost model is an external me
 and its serialization is handled extrnally.
 Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't 
 change the value of that parameter for a model. 
-Use \code{\link{`xgb.parameters<-`}} to set or change model parameters.
+Use \code{\link{xgb.parameters<-}} to set or change model parameters.

 The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
 than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -25,7 +25,7 @@ This is the function inspired from the paragraph 3.1 of the paper:
 \strong{Practical Lessons from Predicting Clicks on Ads at Facebook}

 \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers, 
-Joaquin Quiñonero Candela)}
+Joaquin Quinonero Candela)}
 
 International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014

@@ -33,7 +33,7 @@ International Workshop on Data Mining for Online Advertising (ADKDD) - August 24

 Extract explaining the method:

-"\emph{We found that boosted decision trees are a powerful and very
+"We found that boosted decision trees are a powerful and very
 convenient way to implement non-linear and tuple transformations
 of the kind we just described. We treat each individual
 tree as a categorical feature that takes as value the
@@ -54,7 +54,7 @@ We can understand boosted decision tree
 based transformation as a supervised feature encoding that
 converts a real-valued vector into a compact binary-valued
 vector. A traversal from root node to a leaf node represents
-a rule on certain features.}"
+a rule on certain features."
 }
 \examples{
 data(agaricus.train, package='xgboost')
--- a/R-package/vignettes/discoverYourData.Rmd
+++ b/R-package/vignettes/discoverYourData.Rmd
@@ -241,7 +241,7 @@ Therefore, according to our findings, getting a placebo doesn't seem to help but
 All these things are nice, but it would be even better to plot the results.

 ```{r, fig.width=8, fig.height=5, fig.align='center'}
-xgb.plot.importance(importance_matrix = importanceRaw)
+xgb.plot.importance(importance_matrix = importance)
 ```

 Feature have automatically been divided in 2 clusters: the interesting features... and the others.
--- a/R-package/vignettes/xgboost.Rnw
+++ b/R-package/vignettes/xgboost.Rnw
@@ -164,7 +164,7 @@ dtest <- xgb.DMatrix(test$data, label = test$label)
 watchlist <- list(eval = dtest, train = dtrain)
 param <- list(max_depth = 2, eta = 1, silent = 1)

-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror)
+bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
@

 The gradient and second order gradient is required for the output of customized