From 11efa038bd36c71b0b7f0d7d4c9e12a9b1be9313 Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Mon, 4 Jul 2016 12:40:35 -0500 Subject: [PATCH] [R-package] various fixes for R CMD check (#1328) * [R] fix xgb.create.features * [R] fixes for R CMD check --- R-package/DESCRIPTION | 1 + R-package/NAMESPACE | 10 +++++++--- R-package/R/xgb.Booster.R | 7 ++++--- R-package/R/xgb.DMatrix.R | 8 +++++--- R-package/R/xgb.create.features.R | 17 ++++++----------- R-package/R/xgb.cv.R | 3 ++- R-package/R/xgb.importance.R | 2 +- R-package/R/xgb.plot.deepness.R | 7 ++++--- R-package/R/xgb.plot.multi.trees.R | 2 +- R-package/R/xgboost.R | 2 ++ R-package/man/get.paths.to.leaf.Rd | 2 +- R-package/man/getinfo.Rd | 2 +- R-package/man/multiplot.Rd | 2 ++ R-package/man/predict.xgb.Booster.Rd | 2 +- R-package/man/print.xgb.Booster.Rd | 2 +- R-package/man/print.xgb.DMatrix.Rd | 3 ++- R-package/man/print.xgb.cv.Rd | 2 +- R-package/man/setinfo.Rd | 2 +- R-package/man/xgb.attr.Rd | 2 +- R-package/man/xgb.create.features.Rd | 6 +++--- R-package/vignettes/discoverYourData.Rmd | 2 +- R-package/vignettes/xgboost.Rnw | 2 +- 22 files changed, 49 insertions(+), 39 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 951724aa5..b6881c266 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -20,6 +20,7 @@ BugReports: https://github.com/dmlc/xgboost/issues VignetteBuilder: knitr Suggests: knitr, + rmarkdown, ggplot2 (>= 1.0.1), DiagrammeR (>= 0.8.1), Ckmeans.1d.dp (>= 3.3.1), diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 44f1d5c3c..b144d617a 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -7,6 +7,9 @@ S3method(dimnames,xgb.DMatrix) S3method(getinfo,xgb.DMatrix) S3method(predict,xgb.Booster) S3method(predict,xgb.Booster.handle) +S3method(print,xgb.Booster) +S3method(print,xgb.DMatrix) +S3method(print,xgb.cv.synchronous) S3method(setinfo,xgb.DMatrix) S3method(slice,xgb.DMatrix) export("xgb.attr<-") @@ -19,9 +22,6 @@ export(cb.print.evaluation) export(cb.reset.parameters) export(cb.save.model) export(getinfo) -export(print.xgb.Booster) -export(print.xgb.DMatrix) -export(print.xgb.cv.synchronous) export(setinfo) export(slice) export(xgb.DMatrix) @@ -55,10 +55,14 @@ importFrom(data.table,data.table) importFrom(data.table,rbindlist) importFrom(data.table,setnames) importFrom(magrittr,"%>%") +importFrom(stats,predict) importFrom(stringr,str_detect) importFrom(stringr,str_extract) importFrom(stringr,str_match) importFrom(stringr,str_replace) importFrom(stringr,str_replace_all) importFrom(stringr,str_split) +importFrom(utils,object.size) +importFrom(utils,str) +importFrom(utils,tail) useDynLib(xgboost) diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index 34a88dca6..6c336f238 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -178,7 +178,7 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) { #' @rdname predict.xgb.Booster #' @export predict.xgb.Booster <- function(object, newdata, missing = NA, - outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE) { + outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) { object <- xgb.Booster.check(object, saveraw = FALSE) if (class(newdata) != "xgb.DMatrix") @@ -245,7 +245,7 @@ predict.xgb.Booster.handle <- function(object, ...) { #' and its serialization is handled extrnally. #' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't #' change the value of that parameter for a model. -#' Use \code{\link{`xgb.parameters<-`}} to set or change model parameters. +#' Use \code{\link{xgb.parameters<-}} to set or change model parameters. #' #' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle} #' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied. @@ -413,7 +413,8 @@ xgb.ntree <- function(bst) { #' #' print(bst) #' print(bst, verbose=TRUE) -#' +#' +#' @method print xgb.Booster #' @export print.xgb.Booster <- function(x, verbose=FALSE, ...) { cat('##### xgb.Booster\n') diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index 36f7bfd1a..f6eeb041d 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -186,7 +186,7 @@ getinfo <- function(object, ...) UseMethod("getinfo") #' @rdname getinfo #' @export -getinfo.xgb.DMatrix <- function(object, name) { +getinfo.xgb.DMatrix <- function(object, name, ...) { if (typeof(name) != "character" || length(name) != 1 || !name %in% c('label', 'weight', 'base_margin', 'nrow')) { @@ -211,7 +211,7 @@ getinfo.xgb.DMatrix <- function(object, name) { #' @param name the name of the field to get #' @param info the specific field of information to set #' @param ... other parameters -#' +#' #' @details #' The \code{name} field can be one of the following: #' @@ -237,7 +237,7 @@ setinfo <- function(object, ...) UseMethod("setinfo") #' @rdname setinfo #' @export -setinfo.xgb.DMatrix <- function(object, name, info) { +setinfo.xgb.DMatrix <- function(object, name, info, ...) { if (name == "label") { if (length(info) != nrow(object)) stop("The length of labels must equal to the number of rows in the input data") @@ -341,6 +341,8 @@ slice.xgb.DMatrix <- function(object, idxset, ...) { #' #' dtrain #' print(dtrain, verbose=TRUE) +#' +#' @method print xgb.DMatrix #' @export print.xgb.DMatrix <- function(x, verbose=FALSE, ...) { cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ') diff --git a/R-package/R/xgb.create.features.R b/R-package/R/xgb.create.features.R index 1e5ae75a1..1473fe3b4 100644 --- a/R-package/R/xgb.create.features.R +++ b/R-package/R/xgb.create.features.R @@ -14,7 +14,7 @@ #' \strong{Practical Lessons from Predicting Clicks on Ads at Facebook} #' #' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers, -#' Joaquin QuiƱonero Candela)} +#' Joaquin Quinonero Candela)} #' #' International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014 #' @@ -22,7 +22,7 @@ #' #' Extract explaining the method: #' -#' "\emph{We found that boosted decision trees are a powerful and very +#' "We found that boosted decision trees are a powerful and very #' convenient way to implement non-linear and tuple transformations #' of the kind we just described. We treat each individual #' tree as a categorical feature that takes as value the @@ -43,7 +43,7 @@ #' based transformation as a supervised feature encoding that #' converts a real-valued vector into a compact binary-valued #' vector. A traversal from root node to a leaf node represents -#' a rule on certain features.}" +#' a rule on certain features." #' #' @examples #' data(agaricus.train, package='xgboost') @@ -78,12 +78,7 @@ #' @export xgb.create.features <- function(model, data, ...){ check.deprecation(...) - pred_with_leaf = predict(model, data, predleaf = TRUE) - cols <- list() - for(i in 1:length(trees)){ - # max is not the real max but it s not important for the purpose of adding features - leaf_id <- sort(unique(pred_with_leaf[,i])) - cols[[i]] <- factor(x = pred_with_leaf[,i], level = leaf_id) - } - cBind(data, sparse.model.matrix( ~ . -1, as.data.frame(cols))) + pred_with_leaf <- predict(model, data, predleaf = TRUE) + cols <- lapply(as.data.frame(pred_with_leaf), factor) + cBind(data, sparse.model.matrix( ~ . -1, cols)) } diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index e3a84cec0..51b54b917 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -171,7 +171,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = # CV-predictions callback if (prediction && !has.callbacks(callbacks, 'cb.cv.predict')) { - callbacks <- add.cb(callbacks, cb.cv.predict(save_model=FALSE)) + callbacks <- add.cb(callbacks, cb.cv.predict(save_models=FALSE)) } # Sort the callbacks into categories cb <- categorize.callbacks(callbacks) @@ -253,6 +253,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = #' print(cv, verbose=TRUE) #' #' @rdname print.xgb.cv +#' @method print xgb.cv.synchronous #' @export print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) { cat('##### xgb.cv ', length(x$folds), '-folds\n', sep='') diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index 0219d562c..0877e9a49 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -103,4 +103,4 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe # Avoid error messages during CRAN check. # The reason is that these variables are never declared # They are mainly column names inferred by Data.table... -globalVariables(c(".", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover")) +globalVariables(c(".", ".N", "Gain", "Frequency", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover")) diff --git a/R-package/R/xgb.plot.deepness.R b/R-package/R/xgb.plot.deepness.R index 2c4d71a7d..4d9848fc3 100644 --- a/R-package/R/xgb.plot.deepness.R +++ b/R-package/R/xgb.plot.deepness.R @@ -1,7 +1,8 @@ #' Plot multiple graphs at the same time #' #' Plot multiple graph aligned by rows and columns. -#' +#' +#' @param ... the plots #' @param cols number of columns #' @return NULL multiplot <- function(..., cols = 1) { @@ -40,7 +41,7 @@ edge.parser <- function(element) { } #' Extract path from root to leaf from data.table -#' @param dt.tree data.table containing the nodes and edges of the trees +#' @param dt_tree data.table containing the nodes and edges of the trees get.paths.to.leaf <- function(dt_tree) { dt.not.leaf.edges <- dt_tree[Feature != "Leaf",.(ID, Yes, Tree)] %>% list(dt_tree[Feature != "Leaf",.(ID, No, Tree)]) %>% rbindlist(use.names = F) @@ -149,6 +150,6 @@ xgb.plot.deepness <- function(model = NULL) { # They are mainly column names inferred by Data.table... globalVariables( c( - "Feature", "Count", "ggplot", "aes", "geom_bar", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "ID", "Yes", "No", "Tree" + ".N", "N", "size", "Feature", "Count", "ggplot", "aes", "geom_bar", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "ID", "Yes", "No", "Tree" ) ) diff --git a/R-package/R/xgb.plot.multi.trees.R b/R-package/R/xgb.plot.multi.trees.R index 0b7c9320e..acc303ee2 100644 --- a/R-package/R/xgb.plot.multi.trees.R +++ b/R-package/R/xgb.plot.multi.trees.R @@ -103,6 +103,6 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, globalVariables( c( - "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position" + ".N", "N", "From", "To", "Text", "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position" ) ) diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index e70be0e5c..ed6fa9a81 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -95,6 +95,8 @@ NULL #' @importFrom stringr str_replace #' @importFrom stringr str_replace_all #' @importFrom stringr str_split +#' @importFrom utils object.size str tail +#' @importFrom stats predict #' #' @import methods #' @useDynLib xgboost diff --git a/R-package/man/get.paths.to.leaf.Rd b/R-package/man/get.paths.to.leaf.Rd index 8b19ae6d8..28870ff6a 100644 --- a/R-package/man/get.paths.to.leaf.Rd +++ b/R-package/man/get.paths.to.leaf.Rd @@ -7,7 +7,7 @@ get.paths.to.leaf(dt_tree) } \arguments{ -\item{dt.tree}{data.table containing the nodes and edges of the trees} +\item{dt_tree}{data.table containing the nodes and edges of the trees} } \description{ Extract path from root to leaf from data.table diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index 108d7edf2..16fbe8a79 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -7,7 +7,7 @@ \usage{ getinfo(object, ...) -\method{getinfo}{xgb.DMatrix}(object, name) +\method{getinfo}{xgb.DMatrix}(object, name, ...) } \arguments{ \item{object}{Object of class \code{xgb.DMatrix}} diff --git a/R-package/man/multiplot.Rd b/R-package/man/multiplot.Rd index a2fef7d99..edb6f87f1 100644 --- a/R-package/man/multiplot.Rd +++ b/R-package/man/multiplot.Rd @@ -7,6 +7,8 @@ multiplot(..., cols = 1) } \arguments{ +\item{...}{the plots} + \item{cols}{number of columns} } \description{ diff --git a/R-package/man/predict.xgb.Booster.Rd b/R-package/man/predict.xgb.Booster.Rd index 2dc537112..a07d8f352 100644 --- a/R-package/man/predict.xgb.Booster.Rd +++ b/R-package/man/predict.xgb.Booster.Rd @@ -7,7 +7,7 @@ \usage{ \method{predict}{xgb.Booster}(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, - reshape = FALSE) + reshape = FALSE, ...) \method{predict}{xgb.Booster.handle}(object, ...) } diff --git a/R-package/man/print.xgb.Booster.Rd b/R-package/man/print.xgb.Booster.Rd index 7f13c328c..bec142f8b 100644 --- a/R-package/man/print.xgb.Booster.Rd +++ b/R-package/man/print.xgb.Booster.Rd @@ -4,7 +4,7 @@ \alias{print.xgb.Booster} \title{Print xgb.Booster} \usage{ -print.xgb.Booster(x, verbose = FALSE, ...) +\method{print}{xgb.Booster}(x, verbose = FALSE, ...) } \arguments{ \item{x}{an xgb.Booster object} diff --git a/R-package/man/print.xgb.DMatrix.Rd b/R-package/man/print.xgb.DMatrix.Rd index 95837b9f4..0b677c30c 100644 --- a/R-package/man/print.xgb.DMatrix.Rd +++ b/R-package/man/print.xgb.DMatrix.Rd @@ -4,7 +4,7 @@ \alias{print.xgb.DMatrix} \title{Print xgb.DMatrix} \usage{ -print.xgb.DMatrix(x, verbose = FALSE, ...) +\method{print}{xgb.DMatrix}(x, verbose = FALSE, ...) } \arguments{ \item{x}{an xgb.DMatrix object} @@ -24,5 +24,6 @@ dtrain <- xgb.DMatrix(train$data, label=train$label) dtrain print(dtrain, verbose=TRUE) + } diff --git a/R-package/man/print.xgb.cv.Rd b/R-package/man/print.xgb.cv.Rd index cfe8878c6..731cc91ce 100644 --- a/R-package/man/print.xgb.cv.Rd +++ b/R-package/man/print.xgb.cv.Rd @@ -4,7 +4,7 @@ \alias{print.xgb.cv.synchronous} \title{Print xgb.cv result} \usage{ -print.xgb.cv.synchronous(x, verbose = FALSE, ...) +\method{print}{xgb.cv.synchronous}(x, verbose = FALSE, ...) } \arguments{ \item{x}{an \code{xgb.cv.synchronous} object} diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd index b182a9042..92c2e3294 100644 --- a/R-package/man/setinfo.Rd +++ b/R-package/man/setinfo.Rd @@ -7,7 +7,7 @@ \usage{ setinfo(object, ...) -\method{setinfo}{xgb.DMatrix}(object, name, info) +\method{setinfo}{xgb.DMatrix}(object, name, info, ...) } \arguments{ \item{object}{Object of class "xgb.DMatrix"} diff --git a/R-package/man/xgb.attr.Rd b/R-package/man/xgb.attr.Rd index e8992e714..79ffcc326 100644 --- a/R-package/man/xgb.attr.Rd +++ b/R-package/man/xgb.attr.Rd @@ -48,7 +48,7 @@ would not be saved by \code{xgb.save} because an xgboost model is an external me and its serialization is handled extrnally. Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't change the value of that parameter for a model. -Use \code{\link{`xgb.parameters<-`}} to set or change model parameters. +Use \code{\link{xgb.parameters<-}} to set or change model parameters. The attribute setters would usually work more efficiently for \code{xgb.Booster.handle} than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied. diff --git a/R-package/man/xgb.create.features.Rd b/R-package/man/xgb.create.features.Rd index 4d1b8a152..d9b9ec824 100644 --- a/R-package/man/xgb.create.features.Rd +++ b/R-package/man/xgb.create.features.Rd @@ -25,7 +25,7 @@ This is the function inspired from the paragraph 3.1 of the paper: \strong{Practical Lessons from Predicting Clicks on Ads at Facebook} \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers, -Joaquin QuiƱonero Candela)} +Joaquin Quinonero Candela)} International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014 @@ -33,7 +33,7 @@ International Workshop on Data Mining for Online Advertising (ADKDD) - August 24 Extract explaining the method: -"\emph{We found that boosted decision trees are a powerful and very +"We found that boosted decision trees are a powerful and very convenient way to implement non-linear and tuple transformations of the kind we just described. We treat each individual tree as a categorical feature that takes as value the @@ -54,7 +54,7 @@ We can understand boosted decision tree based transformation as a supervised feature encoding that converts a real-valued vector into a compact binary-valued vector. A traversal from root node to a leaf node represents -a rule on certain features.}" +a rule on certain features." } \examples{ data(agaricus.train, package='xgboost') diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd index 25fff9d3e..2ee4ed90d 100644 --- a/R-package/vignettes/discoverYourData.Rmd +++ b/R-package/vignettes/discoverYourData.Rmd @@ -241,7 +241,7 @@ Therefore, according to our findings, getting a placebo doesn't seem to help but All these things are nice, but it would be even better to plot the results. ```{r, fig.width=8, fig.height=5, fig.align='center'} -xgb.plot.importance(importance_matrix = importanceRaw) +xgb.plot.importance(importance_matrix = importance) ``` Feature have automatically been divided in 2 clusters: the interesting features... and the others. diff --git a/R-package/vignettes/xgboost.Rnw b/R-package/vignettes/xgboost.Rnw index d6d6361b1..492718a68 100644 --- a/R-package/vignettes/xgboost.Rnw +++ b/R-package/vignettes/xgboost.Rnw @@ -164,7 +164,7 @@ dtest <- xgb.DMatrix(test$data, label = test$label) watchlist <- list(eval = dtest, train = dtrain) param <- list(max_depth = 2, eta = 1, silent = 1) -bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror) +bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE) @ The gradient and second order gradient is required for the output of customized