[R-package] various fixes for R CMD check (#1328)

* [R] fix xgb.create.features

* [R] fixes for R CMD check
This commit is contained in:
Vadim Khotilovich 2016-07-04 12:40:35 -05:00 committed by Tianqi Chen
parent f8d23b97be
commit 11efa038bd
22 changed files with 49 additions and 39 deletions

View File

@ -20,6 +20,7 @@ BugReports: https://github.com/dmlc/xgboost/issues
VignetteBuilder: knitr VignetteBuilder: knitr
Suggests: Suggests:
knitr, knitr,
rmarkdown,
ggplot2 (>= 1.0.1), ggplot2 (>= 1.0.1),
DiagrammeR (>= 0.8.1), DiagrammeR (>= 0.8.1),
Ckmeans.1d.dp (>= 3.3.1), Ckmeans.1d.dp (>= 3.3.1),

View File

@ -7,6 +7,9 @@ S3method(dimnames,xgb.DMatrix)
S3method(getinfo,xgb.DMatrix) S3method(getinfo,xgb.DMatrix)
S3method(predict,xgb.Booster) S3method(predict,xgb.Booster)
S3method(predict,xgb.Booster.handle) S3method(predict,xgb.Booster.handle)
S3method(print,xgb.Booster)
S3method(print,xgb.DMatrix)
S3method(print,xgb.cv.synchronous)
S3method(setinfo,xgb.DMatrix) S3method(setinfo,xgb.DMatrix)
S3method(slice,xgb.DMatrix) S3method(slice,xgb.DMatrix)
export("xgb.attr<-") export("xgb.attr<-")
@ -19,9 +22,6 @@ export(cb.print.evaluation)
export(cb.reset.parameters) export(cb.reset.parameters)
export(cb.save.model) export(cb.save.model)
export(getinfo) export(getinfo)
export(print.xgb.Booster)
export(print.xgb.DMatrix)
export(print.xgb.cv.synchronous)
export(setinfo) export(setinfo)
export(slice) export(slice)
export(xgb.DMatrix) export(xgb.DMatrix)
@ -55,10 +55,14 @@ importFrom(data.table,data.table)
importFrom(data.table,rbindlist) importFrom(data.table,rbindlist)
importFrom(data.table,setnames) importFrom(data.table,setnames)
importFrom(magrittr,"%>%") importFrom(magrittr,"%>%")
importFrom(stats,predict)
importFrom(stringr,str_detect) importFrom(stringr,str_detect)
importFrom(stringr,str_extract) importFrom(stringr,str_extract)
importFrom(stringr,str_match) importFrom(stringr,str_match)
importFrom(stringr,str_replace) importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all) importFrom(stringr,str_replace_all)
importFrom(stringr,str_split) importFrom(stringr,str_split)
importFrom(utils,object.size)
importFrom(utils,str)
importFrom(utils,tail)
useDynLib(xgboost) useDynLib(xgboost)

View File

@ -178,7 +178,7 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
#' @rdname predict.xgb.Booster #' @rdname predict.xgb.Booster
#' @export #' @export
predict.xgb.Booster <- function(object, newdata, missing = NA, predict.xgb.Booster <- function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE) { outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {
object <- xgb.Booster.check(object, saveraw = FALSE) object <- xgb.Booster.check(object, saveraw = FALSE)
if (class(newdata) != "xgb.DMatrix") if (class(newdata) != "xgb.DMatrix")
@ -245,7 +245,7 @@ predict.xgb.Booster.handle <- function(object, ...) {
#' and its serialization is handled extrnally. #' and its serialization is handled extrnally.
#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't #' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
#' change the value of that parameter for a model. #' change the value of that parameter for a model.
#' Use \code{\link{`xgb.parameters<-`}} to set or change model parameters. #' Use \code{\link{xgb.parameters<-}} to set or change model parameters.
#' #'
#' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle} #' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
#' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied. #' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
@ -413,7 +413,8 @@ xgb.ntree <- function(bst) {
#' #'
#' print(bst) #' print(bst)
#' print(bst, verbose=TRUE) #' print(bst, verbose=TRUE)
#' #'
#' @method print xgb.Booster
#' @export #' @export
print.xgb.Booster <- function(x, verbose=FALSE, ...) { print.xgb.Booster <- function(x, verbose=FALSE, ...) {
cat('##### xgb.Booster\n') cat('##### xgb.Booster\n')

View File

@ -186,7 +186,7 @@ getinfo <- function(object, ...) UseMethod("getinfo")
#' @rdname getinfo #' @rdname getinfo
#' @export #' @export
getinfo.xgb.DMatrix <- function(object, name) { getinfo.xgb.DMatrix <- function(object, name, ...) {
if (typeof(name) != "character" || if (typeof(name) != "character" ||
length(name) != 1 || length(name) != 1 ||
!name %in% c('label', 'weight', 'base_margin', 'nrow')) { !name %in% c('label', 'weight', 'base_margin', 'nrow')) {
@ -211,7 +211,7 @@ getinfo.xgb.DMatrix <- function(object, name) {
#' @param name the name of the field to get #' @param name the name of the field to get
#' @param info the specific field of information to set #' @param info the specific field of information to set
#' @param ... other parameters #' @param ... other parameters
#' #'
#' @details #' @details
#' The \code{name} field can be one of the following: #' The \code{name} field can be one of the following:
#' #'
@ -237,7 +237,7 @@ setinfo <- function(object, ...) UseMethod("setinfo")
#' @rdname setinfo #' @rdname setinfo
#' @export #' @export
setinfo.xgb.DMatrix <- function(object, name, info) { setinfo.xgb.DMatrix <- function(object, name, info, ...) {
if (name == "label") { if (name == "label") {
if (length(info) != nrow(object)) if (length(info) != nrow(object))
stop("The length of labels must equal to the number of rows in the input data") stop("The length of labels must equal to the number of rows in the input data")
@ -341,6 +341,8 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
#' #'
#' dtrain #' dtrain
#' print(dtrain, verbose=TRUE) #' print(dtrain, verbose=TRUE)
#'
#' @method print xgb.DMatrix
#' @export #' @export
print.xgb.DMatrix <- function(x, verbose=FALSE, ...) { print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ') cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ')

View File

@ -14,7 +14,7 @@
#' \strong{Practical Lessons from Predicting Clicks on Ads at Facebook} #' \strong{Practical Lessons from Predicting Clicks on Ads at Facebook}
#' #'
#' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers, #' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
#' Joaquin Quiñonero Candela)} #' Joaquin Quinonero Candela)}
#' #'
#' International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014 #' International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
#' #'
@ -22,7 +22,7 @@
#' #'
#' Extract explaining the method: #' Extract explaining the method:
#' #'
#' "\emph{We found that boosted decision trees are a powerful and very #' "We found that boosted decision trees are a powerful and very
#' convenient way to implement non-linear and tuple transformations #' convenient way to implement non-linear and tuple transformations
#' of the kind we just described. We treat each individual #' of the kind we just described. We treat each individual
#' tree as a categorical feature that takes as value the #' tree as a categorical feature that takes as value the
@ -43,7 +43,7 @@
#' based transformation as a supervised feature encoding that #' based transformation as a supervised feature encoding that
#' converts a real-valued vector into a compact binary-valued #' converts a real-valued vector into a compact binary-valued
#' vector. A traversal from root node to a leaf node represents #' vector. A traversal from root node to a leaf node represents
#' a rule on certain features.}" #' a rule on certain features."
#' #'
#' @examples #' @examples
#' data(agaricus.train, package='xgboost') #' data(agaricus.train, package='xgboost')
@ -78,12 +78,7 @@
#' @export #' @export
xgb.create.features <- function(model, data, ...){ xgb.create.features <- function(model, data, ...){
check.deprecation(...) check.deprecation(...)
pred_with_leaf = predict(model, data, predleaf = TRUE) pred_with_leaf <- predict(model, data, predleaf = TRUE)
cols <- list() cols <- lapply(as.data.frame(pred_with_leaf), factor)
for(i in 1:length(trees)){ cBind(data, sparse.model.matrix( ~ . -1, cols))
# max is not the real max but it s not important for the purpose of adding features
leaf_id <- sort(unique(pred_with_leaf[,i]))
cols[[i]] <- factor(x = pred_with_leaf[,i], level = leaf_id)
}
cBind(data, sparse.model.matrix( ~ . -1, as.data.frame(cols)))
} }

View File

@ -171,7 +171,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
# CV-predictions callback # CV-predictions callback
if (prediction && if (prediction &&
!has.callbacks(callbacks, 'cb.cv.predict')) { !has.callbacks(callbacks, 'cb.cv.predict')) {
callbacks <- add.cb(callbacks, cb.cv.predict(save_model=FALSE)) callbacks <- add.cb(callbacks, cb.cv.predict(save_models=FALSE))
} }
# Sort the callbacks into categories # Sort the callbacks into categories
cb <- categorize.callbacks(callbacks) cb <- categorize.callbacks(callbacks)
@ -253,6 +253,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
#' print(cv, verbose=TRUE) #' print(cv, verbose=TRUE)
#' #'
#' @rdname print.xgb.cv #' @rdname print.xgb.cv
#' @method print xgb.cv.synchronous
#' @export #' @export
print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) { print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) {
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep='') cat('##### xgb.cv ', length(x$folds), '-folds\n', sep='')

View File

@ -103,4 +103,4 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe
# Avoid error messages during CRAN check. # Avoid error messages during CRAN check.
# The reason is that these variables are never declared # The reason is that these variables are never declared
# They are mainly column names inferred by Data.table... # They are mainly column names inferred by Data.table...
globalVariables(c(".", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover")) globalVariables(c(".", ".N", "Gain", "Frequency", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover"))

View File

@ -1,7 +1,8 @@
#' Plot multiple graphs at the same time #' Plot multiple graphs at the same time
#' #'
#' Plot multiple graph aligned by rows and columns. #' Plot multiple graph aligned by rows and columns.
#' #'
#' @param ... the plots
#' @param cols number of columns #' @param cols number of columns
#' @return NULL #' @return NULL
multiplot <- function(..., cols = 1) { multiplot <- function(..., cols = 1) {
@ -40,7 +41,7 @@ edge.parser <- function(element) {
} }
#' Extract path from root to leaf from data.table #' Extract path from root to leaf from data.table
#' @param dt.tree data.table containing the nodes and edges of the trees #' @param dt_tree data.table containing the nodes and edges of the trees
get.paths.to.leaf <- function(dt_tree) { get.paths.to.leaf <- function(dt_tree) {
dt.not.leaf.edges <- dt.not.leaf.edges <-
dt_tree[Feature != "Leaf",.(ID, Yes, Tree)] %>% list(dt_tree[Feature != "Leaf",.(ID, No, Tree)]) %>% rbindlist(use.names = F) dt_tree[Feature != "Leaf",.(ID, Yes, Tree)] %>% list(dt_tree[Feature != "Leaf",.(ID, No, Tree)]) %>% rbindlist(use.names = F)
@ -149,6 +150,6 @@ xgb.plot.deepness <- function(model = NULL) {
# They are mainly column names inferred by Data.table... # They are mainly column names inferred by Data.table...
globalVariables( globalVariables(
c( c(
"Feature", "Count", "ggplot", "aes", "geom_bar", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "ID", "Yes", "No", "Tree" ".N", "N", "size", "Feature", "Count", "ggplot", "aes", "geom_bar", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "ID", "Yes", "No", "Tree"
) )
) )

View File

@ -103,6 +103,6 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
globalVariables( globalVariables(
c( c(
"Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position" ".N", "N", "From", "To", "Text", "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
) )
) )

View File

@ -95,6 +95,8 @@ NULL
#' @importFrom stringr str_replace #' @importFrom stringr str_replace
#' @importFrom stringr str_replace_all #' @importFrom stringr str_replace_all
#' @importFrom stringr str_split #' @importFrom stringr str_split
#' @importFrom utils object.size str tail
#' @importFrom stats predict
#' #'
#' @import methods #' @import methods
#' @useDynLib xgboost #' @useDynLib xgboost

View File

@ -7,7 +7,7 @@
get.paths.to.leaf(dt_tree) get.paths.to.leaf(dt_tree)
} }
\arguments{ \arguments{
\item{dt.tree}{data.table containing the nodes and edges of the trees} \item{dt_tree}{data.table containing the nodes and edges of the trees}
} }
\description{ \description{
Extract path from root to leaf from data.table Extract path from root to leaf from data.table

View File

@ -7,7 +7,7 @@
\usage{ \usage{
getinfo(object, ...) getinfo(object, ...)
\method{getinfo}{xgb.DMatrix}(object, name) \method{getinfo}{xgb.DMatrix}(object, name, ...)
} }
\arguments{ \arguments{
\item{object}{Object of class \code{xgb.DMatrix}} \item{object}{Object of class \code{xgb.DMatrix}}

View File

@ -7,6 +7,8 @@
multiplot(..., cols = 1) multiplot(..., cols = 1)
} }
\arguments{ \arguments{
\item{...}{the plots}
\item{cols}{number of columns} \item{cols}{number of columns}
} }
\description{ \description{

View File

@ -7,7 +7,7 @@
\usage{ \usage{
\method{predict}{xgb.Booster}(object, newdata, missing = NA, \method{predict}{xgb.Booster}(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE,
reshape = FALSE) reshape = FALSE, ...)
\method{predict}{xgb.Booster.handle}(object, ...) \method{predict}{xgb.Booster.handle}(object, ...)
} }

View File

@ -4,7 +4,7 @@
\alias{print.xgb.Booster} \alias{print.xgb.Booster}
\title{Print xgb.Booster} \title{Print xgb.Booster}
\usage{ \usage{
print.xgb.Booster(x, verbose = FALSE, ...) \method{print}{xgb.Booster}(x, verbose = FALSE, ...)
} }
\arguments{ \arguments{
\item{x}{an xgb.Booster object} \item{x}{an xgb.Booster object}

View File

@ -4,7 +4,7 @@
\alias{print.xgb.DMatrix} \alias{print.xgb.DMatrix}
\title{Print xgb.DMatrix} \title{Print xgb.DMatrix}
\usage{ \usage{
print.xgb.DMatrix(x, verbose = FALSE, ...) \method{print}{xgb.DMatrix}(x, verbose = FALSE, ...)
} }
\arguments{ \arguments{
\item{x}{an xgb.DMatrix object} \item{x}{an xgb.DMatrix object}
@ -24,5 +24,6 @@ dtrain <- xgb.DMatrix(train$data, label=train$label)
dtrain dtrain
print(dtrain, verbose=TRUE) print(dtrain, verbose=TRUE)
} }

View File

@ -4,7 +4,7 @@
\alias{print.xgb.cv.synchronous} \alias{print.xgb.cv.synchronous}
\title{Print xgb.cv result} \title{Print xgb.cv result}
\usage{ \usage{
print.xgb.cv.synchronous(x, verbose = FALSE, ...) \method{print}{xgb.cv.synchronous}(x, verbose = FALSE, ...)
} }
\arguments{ \arguments{
\item{x}{an \code{xgb.cv.synchronous} object} \item{x}{an \code{xgb.cv.synchronous} object}

View File

@ -7,7 +7,7 @@
\usage{ \usage{
setinfo(object, ...) setinfo(object, ...)
\method{setinfo}{xgb.DMatrix}(object, name, info) \method{setinfo}{xgb.DMatrix}(object, name, info, ...)
} }
\arguments{ \arguments{
\item{object}{Object of class "xgb.DMatrix"} \item{object}{Object of class "xgb.DMatrix"}

View File

@ -48,7 +48,7 @@ would not be saved by \code{xgb.save} because an xgboost model is an external me
and its serialization is handled extrnally. and its serialization is handled extrnally.
Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
change the value of that parameter for a model. change the value of that parameter for a model.
Use \code{\link{`xgb.parameters<-`}} to set or change model parameters. Use \code{\link{xgb.parameters<-}} to set or change model parameters.
The attribute setters would usually work more efficiently for \code{xgb.Booster.handle} The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied. than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.

View File

@ -25,7 +25,7 @@ This is the function inspired from the paragraph 3.1 of the paper:
\strong{Practical Lessons from Predicting Clicks on Ads at Facebook} \strong{Practical Lessons from Predicting Clicks on Ads at Facebook}
\emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers, \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
Joaquin Quiñonero Candela)} Joaquin Quinonero Candela)}
International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014 International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
@ -33,7 +33,7 @@ International Workshop on Data Mining for Online Advertising (ADKDD) - August 24
Extract explaining the method: Extract explaining the method:
"\emph{We found that boosted decision trees are a powerful and very "We found that boosted decision trees are a powerful and very
convenient way to implement non-linear and tuple transformations convenient way to implement non-linear and tuple transformations
of the kind we just described. We treat each individual of the kind we just described. We treat each individual
tree as a categorical feature that takes as value the tree as a categorical feature that takes as value the
@ -54,7 +54,7 @@ We can understand boosted decision tree
based transformation as a supervised feature encoding that based transformation as a supervised feature encoding that
converts a real-valued vector into a compact binary-valued converts a real-valued vector into a compact binary-valued
vector. A traversal from root node to a leaf node represents vector. A traversal from root node to a leaf node represents
a rule on certain features.}" a rule on certain features."
} }
\examples{ \examples{
data(agaricus.train, package='xgboost') data(agaricus.train, package='xgboost')

View File

@ -241,7 +241,7 @@ Therefore, according to our findings, getting a placebo doesn't seem to help but
All these things are nice, but it would be even better to plot the results. All these things are nice, but it would be even better to plot the results.
```{r, fig.width=8, fig.height=5, fig.align='center'} ```{r, fig.width=8, fig.height=5, fig.align='center'}
xgb.plot.importance(importance_matrix = importanceRaw) xgb.plot.importance(importance_matrix = importance)
``` ```
Feature have automatically been divided in 2 clusters: the interesting features... and the others. Feature have automatically been divided in 2 clusters: the interesting features... and the others.

View File

@ -164,7 +164,7 @@ dtest <- xgb.DMatrix(test$data, label = test$label)
watchlist <- list(eval = dtest, train = dtrain) watchlist <- list(eval = dtest, train = dtrain)
param <- list(max_depth = 2, eta = 1, silent = 1) param <- list(max_depth = 2, eta = 1, silent = 1)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror) bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
@ @
The gradient and second order gradient is required for the output of customized The gradient and second order gradient is required for the output of customized