[R-package] various fixes for R CMD check (#1328)
* [R] fix xgb.create.features * [R] fixes for R CMD check
This commit is contained in:
parent
f8d23b97be
commit
11efa038bd
@ -20,6 +20,7 @@ BugReports: https://github.com/dmlc/xgboost/issues
|
||||
VignetteBuilder: knitr
|
||||
Suggests:
|
||||
knitr,
|
||||
rmarkdown,
|
||||
ggplot2 (>= 1.0.1),
|
||||
DiagrammeR (>= 0.8.1),
|
||||
Ckmeans.1d.dp (>= 3.3.1),
|
||||
|
||||
@ -7,6 +7,9 @@ S3method(dimnames,xgb.DMatrix)
|
||||
S3method(getinfo,xgb.DMatrix)
|
||||
S3method(predict,xgb.Booster)
|
||||
S3method(predict,xgb.Booster.handle)
|
||||
S3method(print,xgb.Booster)
|
||||
S3method(print,xgb.DMatrix)
|
||||
S3method(print,xgb.cv.synchronous)
|
||||
S3method(setinfo,xgb.DMatrix)
|
||||
S3method(slice,xgb.DMatrix)
|
||||
export("xgb.attr<-")
|
||||
@ -19,9 +22,6 @@ export(cb.print.evaluation)
|
||||
export(cb.reset.parameters)
|
||||
export(cb.save.model)
|
||||
export(getinfo)
|
||||
export(print.xgb.Booster)
|
||||
export(print.xgb.DMatrix)
|
||||
export(print.xgb.cv.synchronous)
|
||||
export(setinfo)
|
||||
export(slice)
|
||||
export(xgb.DMatrix)
|
||||
@ -55,10 +55,14 @@ importFrom(data.table,data.table)
|
||||
importFrom(data.table,rbindlist)
|
||||
importFrom(data.table,setnames)
|
||||
importFrom(magrittr,"%>%")
|
||||
importFrom(stats,predict)
|
||||
importFrom(stringr,str_detect)
|
||||
importFrom(stringr,str_extract)
|
||||
importFrom(stringr,str_match)
|
||||
importFrom(stringr,str_replace)
|
||||
importFrom(stringr,str_replace_all)
|
||||
importFrom(stringr,str_split)
|
||||
importFrom(utils,object.size)
|
||||
importFrom(utils,str)
|
||||
importFrom(utils,tail)
|
||||
useDynLib(xgboost)
|
||||
|
||||
@ -178,7 +178,7 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
|
||||
#' @rdname predict.xgb.Booster
|
||||
#' @export
|
||||
predict.xgb.Booster <- function(object, newdata, missing = NA,
|
||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE) {
|
||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {
|
||||
|
||||
object <- xgb.Booster.check(object, saveraw = FALSE)
|
||||
if (class(newdata) != "xgb.DMatrix")
|
||||
@ -245,7 +245,7 @@ predict.xgb.Booster.handle <- function(object, ...) {
|
||||
#' and its serialization is handled extrnally.
|
||||
#' Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
|
||||
#' change the value of that parameter for a model.
|
||||
#' Use \code{\link{`xgb.parameters<-`}} to set or change model parameters.
|
||||
#' Use \code{\link{xgb.parameters<-}} to set or change model parameters.
|
||||
#'
|
||||
#' The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
|
||||
#' than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
|
||||
@ -414,6 +414,7 @@ xgb.ntree <- function(bst) {
|
||||
#' print(bst)
|
||||
#' print(bst, verbose=TRUE)
|
||||
#'
|
||||
#' @method print xgb.Booster
|
||||
#' @export
|
||||
print.xgb.Booster <- function(x, verbose=FALSE, ...) {
|
||||
cat('##### xgb.Booster\n')
|
||||
|
||||
@ -186,7 +186,7 @@ getinfo <- function(object, ...) UseMethod("getinfo")
|
||||
|
||||
#' @rdname getinfo
|
||||
#' @export
|
||||
getinfo.xgb.DMatrix <- function(object, name) {
|
||||
getinfo.xgb.DMatrix <- function(object, name, ...) {
|
||||
if (typeof(name) != "character" ||
|
||||
length(name) != 1 ||
|
||||
!name %in% c('label', 'weight', 'base_margin', 'nrow')) {
|
||||
@ -237,7 +237,7 @@ setinfo <- function(object, ...) UseMethod("setinfo")
|
||||
|
||||
#' @rdname setinfo
|
||||
#' @export
|
||||
setinfo.xgb.DMatrix <- function(object, name, info) {
|
||||
setinfo.xgb.DMatrix <- function(object, name, info, ...) {
|
||||
if (name == "label") {
|
||||
if (length(info) != nrow(object))
|
||||
stop("The length of labels must equal to the number of rows in the input data")
|
||||
@ -341,6 +341,8 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
|
||||
#'
|
||||
#' dtrain
|
||||
#' print(dtrain, verbose=TRUE)
|
||||
#'
|
||||
#' @method print xgb.DMatrix
|
||||
#' @export
|
||||
print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
|
||||
cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ')
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
#' \strong{Practical Lessons from Predicting Clicks on Ads at Facebook}
|
||||
#'
|
||||
#' \emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
|
||||
#' Joaquin Quiñonero Candela)}
|
||||
#' Joaquin Quinonero Candela)}
|
||||
#'
|
||||
#' International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
|
||||
#'
|
||||
@ -22,7 +22,7 @@
|
||||
#'
|
||||
#' Extract explaining the method:
|
||||
#'
|
||||
#' "\emph{We found that boosted decision trees are a powerful and very
|
||||
#' "We found that boosted decision trees are a powerful and very
|
||||
#' convenient way to implement non-linear and tuple transformations
|
||||
#' of the kind we just described. We treat each individual
|
||||
#' tree as a categorical feature that takes as value the
|
||||
@ -43,7 +43,7 @@
|
||||
#' based transformation as a supervised feature encoding that
|
||||
#' converts a real-valued vector into a compact binary-valued
|
||||
#' vector. A traversal from root node to a leaf node represents
|
||||
#' a rule on certain features.}"
|
||||
#' a rule on certain features."
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
@ -78,12 +78,7 @@
|
||||
#' @export
|
||||
xgb.create.features <- function(model, data, ...){
|
||||
check.deprecation(...)
|
||||
pred_with_leaf = predict(model, data, predleaf = TRUE)
|
||||
cols <- list()
|
||||
for(i in 1:length(trees)){
|
||||
# max is not the real max but it s not important for the purpose of adding features
|
||||
leaf_id <- sort(unique(pred_with_leaf[,i]))
|
||||
cols[[i]] <- factor(x = pred_with_leaf[,i], level = leaf_id)
|
||||
}
|
||||
cBind(data, sparse.model.matrix( ~ . -1, as.data.frame(cols)))
|
||||
pred_with_leaf <- predict(model, data, predleaf = TRUE)
|
||||
cols <- lapply(as.data.frame(pred_with_leaf), factor)
|
||||
cBind(data, sparse.model.matrix( ~ . -1, cols))
|
||||
}
|
||||
|
||||
@ -171,7 +171,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
# CV-predictions callback
|
||||
if (prediction &&
|
||||
!has.callbacks(callbacks, 'cb.cv.predict')) {
|
||||
callbacks <- add.cb(callbacks, cb.cv.predict(save_model=FALSE))
|
||||
callbacks <- add.cb(callbacks, cb.cv.predict(save_models=FALSE))
|
||||
}
|
||||
# Sort the callbacks into categories
|
||||
cb <- categorize.callbacks(callbacks)
|
||||
@ -253,6 +253,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
#' print(cv, verbose=TRUE)
|
||||
#'
|
||||
#' @rdname print.xgb.cv
|
||||
#' @method print xgb.cv.synchronous
|
||||
#' @export
|
||||
print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) {
|
||||
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep='')
|
||||
|
||||
@ -103,4 +103,4 @@ xgb.importance <- function(feature_names = NULL, model = NULL, data = NULL, labe
|
||||
# Avoid error messages during CRAN check.
|
||||
# The reason is that these variables are never declared
|
||||
# They are mainly column names inferred by Data.table...
|
||||
globalVariables(c(".", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover"))
|
||||
globalVariables(c(".", ".N", "Gain", "Frequency", "Feature", "Split", "No", "Missing", "MissingNo", "RealCover"))
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
#'
|
||||
#' Plot multiple graph aligned by rows and columns.
|
||||
#'
|
||||
#' @param ... the plots
|
||||
#' @param cols number of columns
|
||||
#' @return NULL
|
||||
multiplot <- function(..., cols = 1) {
|
||||
@ -40,7 +41,7 @@ edge.parser <- function(element) {
|
||||
}
|
||||
|
||||
#' Extract path from root to leaf from data.table
|
||||
#' @param dt.tree data.table containing the nodes and edges of the trees
|
||||
#' @param dt_tree data.table containing the nodes and edges of the trees
|
||||
get.paths.to.leaf <- function(dt_tree) {
|
||||
dt.not.leaf.edges <-
|
||||
dt_tree[Feature != "Leaf",.(ID, Yes, Tree)] %>% list(dt_tree[Feature != "Leaf",.(ID, No, Tree)]) %>% rbindlist(use.names = F)
|
||||
@ -149,6 +150,6 @@ xgb.plot.deepness <- function(model = NULL) {
|
||||
# They are mainly column names inferred by Data.table...
|
||||
globalVariables(
|
||||
c(
|
||||
"Feature", "Count", "ggplot", "aes", "geom_bar", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "ID", "Yes", "No", "Tree"
|
||||
".N", "N", "size", "Feature", "Count", "ggplot", "aes", "geom_bar", "xlab", "ylab", "ggtitle", "theme", "element_blank", "element_text", "ID", "Yes", "No", "Tree"
|
||||
)
|
||||
)
|
||||
|
||||
@ -103,6 +103,6 @@ xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5,
|
||||
|
||||
globalVariables(
|
||||
c(
|
||||
"Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
|
||||
".N", "N", "From", "To", "Text", "Feature", "no.nodes.abs.pos", "ID", "Yes", "No", "Tree", "yes.nodes.abs.pos", "abs.node.position"
|
||||
)
|
||||
)
|
||||
|
||||
@ -95,6 +95,8 @@ NULL
|
||||
#' @importFrom stringr str_replace
|
||||
#' @importFrom stringr str_replace_all
|
||||
#' @importFrom stringr str_split
|
||||
#' @importFrom utils object.size str tail
|
||||
#' @importFrom stats predict
|
||||
#'
|
||||
#' @import methods
|
||||
#' @useDynLib xgboost
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
get.paths.to.leaf(dt_tree)
|
||||
}
|
||||
\arguments{
|
||||
\item{dt.tree}{data.table containing the nodes and edges of the trees}
|
||||
\item{dt_tree}{data.table containing the nodes and edges of the trees}
|
||||
}
|
||||
\description{
|
||||
Extract path from root to leaf from data.table
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
\usage{
|
||||
getinfo(object, ...)
|
||||
|
||||
\method{getinfo}{xgb.DMatrix}(object, name)
|
||||
\method{getinfo}{xgb.DMatrix}(object, name, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class \code{xgb.DMatrix}}
|
||||
|
||||
@ -7,6 +7,8 @@
|
||||
multiplot(..., cols = 1)
|
||||
}
|
||||
\arguments{
|
||||
\item{...}{the plots}
|
||||
|
||||
\item{cols}{number of columns}
|
||||
}
|
||||
\description{
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
\usage{
|
||||
\method{predict}{xgb.Booster}(object, newdata, missing = NA,
|
||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE,
|
||||
reshape = FALSE)
|
||||
reshape = FALSE, ...)
|
||||
|
||||
\method{predict}{xgb.Booster.handle}(object, ...)
|
||||
}
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
\alias{print.xgb.Booster}
|
||||
\title{Print xgb.Booster}
|
||||
\usage{
|
||||
print.xgb.Booster(x, verbose = FALSE, ...)
|
||||
\method{print}{xgb.Booster}(x, verbose = FALSE, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{an xgb.Booster object}
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
\alias{print.xgb.DMatrix}
|
||||
\title{Print xgb.DMatrix}
|
||||
\usage{
|
||||
print.xgb.DMatrix(x, verbose = FALSE, ...)
|
||||
\method{print}{xgb.DMatrix}(x, verbose = FALSE, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{an xgb.DMatrix object}
|
||||
@ -24,5 +24,6 @@ dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
|
||||
dtrain
|
||||
print(dtrain, verbose=TRUE)
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
\alias{print.xgb.cv.synchronous}
|
||||
\title{Print xgb.cv result}
|
||||
\usage{
|
||||
print.xgb.cv.synchronous(x, verbose = FALSE, ...)
|
||||
\method{print}{xgb.cv.synchronous}(x, verbose = FALSE, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{an \code{xgb.cv.synchronous} object}
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
\usage{
|
||||
setinfo(object, ...)
|
||||
|
||||
\method{setinfo}{xgb.DMatrix}(object, name, info)
|
||||
\method{setinfo}{xgb.DMatrix}(object, name, info, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.DMatrix"}
|
||||
|
||||
@ -48,7 +48,7 @@ would not be saved by \code{xgb.save} because an xgboost model is an external me
|
||||
and its serialization is handled extrnally.
|
||||
Also, setting an attribute that has the same name as one of xgboost's parameters wouldn't
|
||||
change the value of that parameter for a model.
|
||||
Use \code{\link{`xgb.parameters<-`}} to set or change model parameters.
|
||||
Use \code{\link{xgb.parameters<-}} to set or change model parameters.
|
||||
|
||||
The attribute setters would usually work more efficiently for \code{xgb.Booster.handle}
|
||||
than for \code{xgb.Booster}, since only just a handle (pointer) would need to be copied.
|
||||
|
||||
@ -25,7 +25,7 @@ This is the function inspired from the paragraph 3.1 of the paper:
|
||||
\strong{Practical Lessons from Predicting Clicks on Ads at Facebook}
|
||||
|
||||
\emph{(Xinran He, Junfeng Pan, Ou Jin, Tianbing Xu, Bo Liu, Tao Xu, Yan, xin Shi, Antoine Atallah, Ralf Herbrich, Stuart Bowers,
|
||||
Joaquin Quiñonero Candela)}
|
||||
Joaquin Quinonero Candela)}
|
||||
|
||||
International Workshop on Data Mining for Online Advertising (ADKDD) - August 24, 2014
|
||||
|
||||
@ -33,7 +33,7 @@ International Workshop on Data Mining for Online Advertising (ADKDD) - August 24
|
||||
|
||||
Extract explaining the method:
|
||||
|
||||
"\emph{We found that boosted decision trees are a powerful and very
|
||||
"We found that boosted decision trees are a powerful and very
|
||||
convenient way to implement non-linear and tuple transformations
|
||||
of the kind we just described. We treat each individual
|
||||
tree as a categorical feature that takes as value the
|
||||
@ -54,7 +54,7 @@ We can understand boosted decision tree
|
||||
based transformation as a supervised feature encoding that
|
||||
converts a real-valued vector into a compact binary-valued
|
||||
vector. A traversal from root node to a leaf node represents
|
||||
a rule on certain features.}"
|
||||
a rule on certain features."
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
|
||||
@ -241,7 +241,7 @@ Therefore, according to our findings, getting a placebo doesn't seem to help but
|
||||
All these things are nice, but it would be even better to plot the results.
|
||||
|
||||
```{r, fig.width=8, fig.height=5, fig.align='center'}
|
||||
xgb.plot.importance(importance_matrix = importanceRaw)
|
||||
xgb.plot.importance(importance_matrix = importance)
|
||||
```
|
||||
|
||||
Feature have automatically been divided in 2 clusters: the interesting features... and the others.
|
||||
|
||||
@ -164,7 +164,7 @@ dtest <- xgb.DMatrix(test$data, label = test$label)
|
||||
watchlist <- list(eval = dtest, train = dtrain)
|
||||
param <- list(max_depth = 2, eta = 1, silent = 1)
|
||||
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror)
|
||||
bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, logregobj, evalerror, maximize = FALSE)
|
||||
@
|
||||
|
||||
The gradient and second order gradient is required for the output of customized
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user