[R-package] various fixes for R CMD check (#1328)

* [R] fix xgb.create.features

* [R] fixes for R CMD check
This commit is contained in:
Vadim Khotilovich 2016-07-04 12:40:35 -05:00 committed by Tianqi Chen
parent f8d23b97be
commit 11efa038bd
22 changed files with 49 additions and 39 deletions

View File

@ -20,6 +20,7 @@ BugReports: https://github.com/dmlc/xgboost/issues
VignetteBuilder: knitr
Suggests:
knitr,
rmarkdown,
ggplot2 (>= 1.0.1),
DiagrammeR (>= 0.8.1),
Ckmeans.1d.dp (>= 3.3.1),

View File

@ -7,6 +7,9 @@ S3method(dimnames,xgb.DMatrix)
S3method(getinfo,xgb.DMatrix)
S3method(predict,xgb.Booster)
S3method(predict,xgb.Booster.handle)
S3method(print,xgb.Booster)
S3method(print,xgb.DMatrix)
S3method(print,xgb.cv.synchronous)
S3method(setinfo,xgb.DMatrix)
S3method(slice,xgb.DMatrix)
export("xgb.attr<-")
@ -19,9 +22,6 @@ export(cb.print.evaluation)
export(cb.reset.parameters)
export(cb.save.model)
export(getinfo)
export(print.xgb.Booster)
export(print.xgb.DMatrix)
export(print.xgb.cv.synchronous)
export(setinfo)
export(slice)
export(xgb.DMatrix)
@ -55,10 +55,14 @@ importFrom(data.table,data.table)
importFrom(data.table,rbindlist)
importFrom(data.table,setnames)
importFrom(magrittr,"%>%")
importFrom(stats,predict)
importFrom(stringr,str_detect)
importFrom(stringr,str_extract)
importFrom(stringr,str_match)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_split)
importFrom(utils,object.size)
importFrom(utils,str)
importFrom(utils,tail)
useDynLib(xgboost)

View File

@ -178,7 +178,7 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
#' @rdname predict.xgb.Booster
#' @export
predict.xgb.Booster <- function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE) {
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {
object <- xgb.Booster.check(object, saveraw = FALSE)
if (class(newdata) != "xgb.DMatrix")
@ -245,7 +245,7 @@ predict.xgb.Booster.handle <- function(object, ...) {
#' and its serialization is handled extrnally.
#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
#' change the value of that parameter for a model.
#' Use \code{\link{`xgb.parameters<-`}} to set or change model parameters.
#' Use \code{\link{xgb.parameters<-}} to set or change model parameters.
#'
#' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
#' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
@ -414,6 +414,7 @@ xgb.ntree <- function(bst) {
#' print(bst)
#' print(bst, verbose=TRUE)
#'
#' @method print xgb.Booster
#' @export
print.xgb.Booster <- function(x, verbose=FALSE, ...) {
cat('##### xgb.Booster\n')

View File

@ -186,7 +186,7 @@ getinfo <- function(object, ...) UseMethod("getinfo")
#' @rdname getinfo
#' @export
getinfo.xgb.DMatrix <- function(object, name) {
getinfo.xgb.DMatrix <- function(object, name, ...) {
if (typeof(name) != "character" ||
length(name) != 1 ||
!name %in% c('label', 'weight', 'base_margin', 'nrow')) {
@ -237,7 +237,7 @@ setinfo <- function(object, ...) UseMethod("setinfo")
#' @rdname setinfo
#' @export
setinfo.xgb.DMatrix <- function(object, name, info) {
setinfo.xgb.DMatrix <- function(object, name, info, ...) {
if (name == "label") {
if (length(info) != nrow(object))
stop("The length of labels must equal to the number of rows in the input data")
@ -341,6 +341,8 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
#'
#' dtrain
#' print(dtrain, verbose=TRUE)
#'
#' @method print xgb.DMatrix
#' @export
print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ')

View File

@ -14,7 +14,7 @@
#' \strong{Practical Lessons from Predicting Clicks on Ads at Facebook}
#'
#' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
#' Joaquin Quiñonero Candela)}
#' Joaquin Quinonero Candela)}
#'
#' International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
#'
@ -22,7 +22,7 @@
#'
#' Extract explaining the method:
#'
#' "\emph{We found that boosted decision trees are a powerful and very
#' "We found that boosted decision trees are a powerful and very
#' convenient way to implement non-linear and tuple transformations
#' of the kind we just described. We treat each individual
#' tree as a categorical feature that takes as value the
@ -43,7 +43,7 @@
#' based transformation as a supervised feature encoding that
#' converts a real-valued vector into a compact binary-valued
#' vector. A traversal from root node to a leaf node represents
#' a rule on certain features.}"
#' a rule on certain features."
#'
#' @examples
#' data(agaricus.train, package='xgboost')
@ -78,12 +78,7 @@
#' @export
xgb.create.features <- function(model, data, ...){
check.deprecation(...)
pred_with_leaf = predict(model, data, predleaf = TRUE)
cols <- list()
for(i in 1:length(trees)){
# max is not the real max but it s not important for the purpose of adding features
leaf_id <- sort(unique(pred_with_leaf[,i]))
cols[[i]] <- factor(x = pred_with_leaf[,i], level = leaf_id)
}
cBind(data, sparse.model.matrix( ~ . -1, as.data.frame(cols)))
pred_with_leaf <- predict(model, data, predleaf = TRUE)
cols <- lapply(as.data.frame(pred_with_leaf), factor)
cBind(data, sparse.model.matrix( ~ . -1, cols))
}

View File

@ -171,7 +171,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
# CV-predictions callback
if (prediction &&
!has.callbacks(callbacks, 'cb.cv.predict')) {
callbacks <- add.cb(callbacks, cb.cv.predict(save_model=FALSE))
callbacks <- add.cb(callbacks, cb.cv.predict(save_models=FALSE))
}
# Sort the callbacks into categories
cb <- categorize.callbacks(callbacks)
@ -253,6 +253,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
#' print(cv, verbose=TRUE)
#'
#' @rdname print.xgb.cv
#' @method print xgb.cv.synchronous
#' @export
print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) {
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep='')

View File

@ -103,4 +103,4 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe
# Avoid error messages during CRAN check.
# The reason is that these variables are never declared
# They are mainly column names inferred by Data.table...
globalVariables(c(".", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover"))
globalVariables(c(".", ".N", "Gain", "Frequency", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover"))

View File

@ -2,6 +2,7 @@
#'
#' Plot multiple graph aligned by rows and columns.
#'
#' @param ... the plots
#' @param cols number of columns
#' @return NULL
multiplot <- function(..., cols = 1) {
@ -40,7 +41,7 @@ edge.parser <- function(element) {
}
#' Extract path from root to leaf from data.table
#' @param dt.tree data.table containing the nodes and edges of the trees
#' @param dt_tree data.table containing the nodes and edges of the trees
get.paths.to.leaf <- function(dt_tree) {
dt.not.leaf.edges <-
dt_tree[Feature != "Leaf",.(ID, Yes, Tree)] %>% list(dt_tree[Feature != "Leaf",.(ID, No, Tree)]) %>% rbindlist(use.names = F)
@ -149,6 +150,6 @@ xgb.plot.deepness <- function(model = NULL) {
# They are mainly column names inferred by Data.table...
globalVariables(
c(
"Feature", "Count", "ggplot", "aes", "geom_bar", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "ID", "Yes", "No", "Tree"
".N", "N", "size", "Feature", "Count", "ggplot", "aes", "geom_bar", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "ID", "Yes", "No", "Tree"
)
)

View File

@ -103,6 +103,6 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
globalVariables(
c(
"Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
".N", "N", "From", "To", "Text", "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
)
)

View File

@ -95,6 +95,8 @@ NULL
#' @importFrom stringr str_replace
#' @importFrom stringr str_replace_all
#' @importFrom stringr str_split
#' @importFrom utils object.size str tail
#' @importFrom stats predict
#'
#' @import methods
#' @useDynLib xgboost

View File

@ -7,7 +7,7 @@
get.paths.to.leaf(dt_tree)
}
\arguments{
\item{dt.tree}{data.table containing the nodes and edges of the trees}
\item{dt_tree}{data.table containing the nodes and edges of the trees}
}
\description{
Extract path from root to leaf from data.table

View File

@ -7,7 +7,7 @@
\usage{
getinfo(object, ...)
\method{getinfo}{xgb.DMatrix}(object, name)
\method{getinfo}{xgb.DMatrix}(object, name, ...)
}
\arguments{
\item{object}{Object of class \code{xgb.DMatrix}}

View File

@ -7,6 +7,8 @@
multiplot(..., cols = 1)
}
\arguments{
\item{...}{the plots}
\item{cols}{number of columns}
}
\description{

View File

@ -7,7 +7,7 @@
\usage{
\method{predict}{xgb.Booster}(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE,
reshape = FALSE)
reshape = FALSE, ...)
\method{predict}{xgb.Booster.handle}(object, ...)
}

View File

@ -4,7 +4,7 @@
\alias{print.xgb.Booster}
\title{Print xgb.Booster}
\usage{
print.xgb.Booster(x, verbose = FALSE, ...)
\method{print}{xgb.Booster}(x, verbose = FALSE, ...)
}
\arguments{
\item{x}{an xgb.Booster object}

View File

@ -4,7 +4,7 @@
\alias{print.xgb.DMatrix}
\title{Print xgb.DMatrix}
\usage{
print.xgb.DMatrix(x, verbose = FALSE, ...)
\method{print}{xgb.DMatrix}(x, verbose = FALSE, ...)
}
\arguments{
\item{x}{an xgb.DMatrix object}
@ -24,5 +24,6 @@ dtrain <- xgb.DMatrix(train$data, label=train$label)
dtrain
print(dtrain, verbose=TRUE)
}

View File

@ -4,7 +4,7 @@
\alias{print.xgb.cv.synchronous}
\title{Print xgb.cv result}
\usage{
print.xgb.cv.synchronous(x, verbose = FALSE, ...)
\method{print}{xgb.cv.synchronous}(x, verbose = FALSE, ...)
}
\arguments{
\item{x}{an \code{xgb.cv.synchronous} object}

View File

@ -7,7 +7,7 @@
\usage{
setinfo(object, ...)
\method{setinfo}{xgb.DMatrix}(object, name, info)
\method{setinfo}{xgb.DMatrix}(object, name, info, ...)
}
\arguments{
\item{object}{Object of class "xgb.DMatrix"}

View File

@ -48,7 +48,7 @@ would not be saved by \code{xgb.save} because an xgboost model is an external me
and its serialization is handled extrnally.
Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
change the value of that parameter for a model.
Use \code{\link{`xgb.parameters<-`}} to set or change model parameters.
Use \code{\link{xgb.parameters<-}} to set or change model parameters.
The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.

View File

@ -25,7 +25,7 @@ This is the function inspired from the paragraph 3.1 of the paper:
\strong{Practical Lessons from Predicting Clicks on Ads at Facebook}
\emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
Joaquin Quiñonero Candela)}
Joaquin Quinonero Candela)}
International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
@ -33,7 +33,7 @@ International Workshop on Data Mining for Online Advertising (ADKDD) - August 24
Extract explaining the method:
"\emph{We found that boosted decision trees are a powerful and very
"We found that boosted decision trees are a powerful and very
convenient way to implement non-linear and tuple transformations
of the kind we just described. We treat each individual
tree as a categorical feature that takes as value the
@ -54,7 +54,7 @@ We can understand boosted decision tree
based transformation as a supervised feature encoding that
converts a real-valued vector into a compact binary-valued
vector. A traversal from root node to a leaf node represents
a rule on certain features.}"
a rule on certain features."
}
\examples{
data(agaricus.train, package='xgboost')

View File

@ -241,7 +241,7 @@ Therefore, according to our findings, getting a placebo doesn't seem to help but
All these things are nice, but it would be even better to plot the results.
```{r, fig.width=8, fig.height=5, fig.align='center'}
xgb.plot.importance(importance_matrix = importanceRaw)
xgb.plot.importance(importance_matrix = importance)
```
Feature have automatically been divided in 2 clusters: the interesting features... and the others.

View File

@ -164,7 +164,7 @@ dtest <- xgb.DMatrix(test$data, label = test$label)
watchlist <- list(eval = dtest, train = dtrain)
param <- list(max_depth = 2, eta = 1, silent = 1)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror)
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
@
The gradient and second order gradient is required for the output of customized