consolidated DMatrix&Booster stuff into xgb.DMatrix.R & xgb.Booster.R
This commit is contained in:
parent
1d504d6c6c
commit
d27bfb61b0
@ -1,55 +0,0 @@
|
||||
setClass('xgb.DMatrix')
|
||||
|
||||
#' Get information of an xgb.DMatrix object
|
||||
#'
|
||||
#' Get information of an xgb.DMatrix object
|
||||
#'
|
||||
#' The information can be one of the following:
|
||||
#'
|
||||
#' \itemize{
|
||||
#' \item \code{label}: label Xgboost learn from ;
|
||||
#' \item \code{weight}: to do a weight rescale ;
|
||||
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
|
||||
#' \item \code{nrow}: number of rows of the \code{xgb.DMatrix}.
|
||||
#' }
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#' labels <- getinfo(dtrain, 'label')
|
||||
#' setinfo(dtrain, 'label', 1-labels)
|
||||
#' labels2 <- getinfo(dtrain, 'label')
|
||||
#' stopifnot(all(labels2 == 1-labels))
|
||||
#' @rdname getinfo
|
||||
#' @export
|
||||
getinfo <- function(object, ...){
|
||||
UseMethod("getinfo")
|
||||
}
|
||||
|
||||
|
||||
|
||||
#' @param object Object of class \code{xgb.DMatrix}
|
||||
#' @param name the name of the field to get
|
||||
#' @param ... other parameters
|
||||
#' @rdname getinfo
|
||||
#' @method getinfo xgb.DMatrix
|
||||
setMethod("getinfo", signature = "xgb.DMatrix",
|
||||
definition = function(object, name) {
|
||||
if (typeof(name) != "character") {
|
||||
stop("xgb.getinfo: name must be character")
|
||||
}
|
||||
if (class(object) != "xgb.DMatrix") {
|
||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
|
||||
}
|
||||
if (name != "label" && name != "weight" &&
|
||||
name != "base_margin" && name != "nrow") {
|
||||
stop(paste("xgb.getinfo: unknown info name", name))
|
||||
}
|
||||
if (name != "nrow"){
|
||||
ret <- .Call("XGDMatrixGetInfo_R", object, name, PACKAGE = "xgboost")
|
||||
} else {
|
||||
ret <- xgb.numrow(object)
|
||||
}
|
||||
return(ret)
|
||||
})
|
||||
@ -1,19 +0,0 @@
|
||||
setGeneric("nrow")
|
||||
|
||||
#' @title Number of xgb.DMatrix rows
|
||||
#' @description \code{nrow} return the number of rows present in the \code{xgb.DMatrix}.
|
||||
#' @param x Object of class \code{xgb.DMatrix}
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#' stopifnot(nrow(dtrain) == nrow(train$data))
|
||||
#'
|
||||
#' @export
|
||||
setMethod("nrow",
|
||||
signature = "xgb.DMatrix",
|
||||
definition = function(x) {
|
||||
xgb.numrow(x)
|
||||
}
|
||||
)
|
||||
@ -1,80 +0,0 @@
|
||||
setClass("xgb.Booster.handle")
|
||||
setClass("xgb.Booster",
|
||||
slots = c(handle = "xgb.Booster.handle",
|
||||
raw = "raw"))
|
||||
|
||||
#' Predict method for eXtreme Gradient Boosting model
|
||||
#'
|
||||
#' Predicted values based on xgboost model object.
|
||||
#'
|
||||
#' @param object Object of class "xgb.Boost"
|
||||
#' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or
|
||||
#' \code{xgb.DMatrix}.
|
||||
#' @param missing Missing is only used when input is dense matrix, pick a float
|
||||
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
|
||||
#' @param outputmargin whether the prediction should be shown in the original
|
||||
#' value of sum of functions, when outputmargin=TRUE, the prediction is
|
||||
#' untransformed margin value. In logistic regression, outputmargin=T will
|
||||
#' output value before logistic transformation.
|
||||
#' @param ntreelimit limit number of trees used in prediction, this parameter is
|
||||
#' only valid for gbtree, but not for gblinear. set it to be value bigger
|
||||
#' than 0. It will use all trees by default.
|
||||
#' @param predleaf whether predict leaf index instead. If set to TRUE, the output will be a matrix object.
|
||||
#'
|
||||
#' @details
|
||||
#' The option \code{ntreelimit} purpose is to let the user train a model with lots
|
||||
#' of trees but use only the first trees for prediction to avoid overfitting
|
||||
#' (without having to train a new model with less trees).
|
||||
#'
|
||||
#' The option \code{predleaf} purpose is inspired from §3.1 of the paper
|
||||
#' \code{Practical Lessons from Predicting Clicks on Ads at Facebook}.
|
||||
#' The idea is to use the model as a generator of new features which capture non linear link
|
||||
#' from original features.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||
#' pred <- predict(bst, test$data)
|
||||
#' @export
|
||||
setMethod("predict", signature = "xgb.Booster",
|
||||
definition = function(object, newdata, missing = NA,
|
||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {
|
||||
if (class(object) != "xgb.Booster"){
|
||||
stop("predict: model in prediction must be of class xgb.Booster")
|
||||
} else {
|
||||
object <- xgb.Booster.check(object, saveraw = FALSE)
|
||||
}
|
||||
if (class(newdata) != "xgb.DMatrix") {
|
||||
newdata <- xgb.DMatrix(newdata, missing = missing)
|
||||
}
|
||||
if (is.null(ntreelimit)) {
|
||||
ntreelimit <- 0
|
||||
} else {
|
||||
if (ntreelimit < 1){
|
||||
stop("predict: ntreelimit must be equal to or greater than 1")
|
||||
}
|
||||
}
|
||||
option <- 0
|
||||
if (outputmargin) {
|
||||
option <- option + 1
|
||||
}
|
||||
if (predleaf) {
|
||||
option <- option + 2
|
||||
}
|
||||
ret <- .Call("XGBoosterPredict_R", object$handle, newdata, as.integer(option),
|
||||
as.integer(ntreelimit), PACKAGE = "xgboost")
|
||||
if (predleaf){
|
||||
len <- getinfo(newdata, "nrow")
|
||||
if (length(ret) == len){
|
||||
ret <- matrix(ret,ncol = 1)
|
||||
} else {
|
||||
ret <- matrix(ret, ncol = len)
|
||||
ret <- t(ret)
|
||||
}
|
||||
}
|
||||
return(ret)
|
||||
})
|
||||
@ -1,18 +0,0 @@
|
||||
#' Predict method for eXtreme Gradient Boosting model handle
|
||||
#'
|
||||
#' Predicted values based on xgb.Booster.handle object.
|
||||
#'
|
||||
#' @param object Object of class "xgb.Boost.handle"
|
||||
#' @param ... Parameters pass to \code{predict.xgb.Booster}
|
||||
#'
|
||||
setMethod("predict", signature = "xgb.Booster.handle",
|
||||
definition = function(object, ...) {
|
||||
if (class(object) != "xgb.Booster.handle"){
|
||||
stop("predict: model in prediction must be of class xgb.Booster.handle")
|
||||
}
|
||||
|
||||
bst <- xgb.handleToBooster(object)
|
||||
|
||||
ret <- predict(bst, ...)
|
||||
return(ret)
|
||||
})
|
||||
@ -1,37 +0,0 @@
|
||||
#' Set information of an xgb.DMatrix object
|
||||
#'
|
||||
#' Set information of an xgb.DMatrix object
|
||||
#'
|
||||
#' It can be one of the following:
|
||||
#'
|
||||
#' \itemize{
|
||||
#' \item \code{label}: label Xgboost learn from ;
|
||||
#' \item \code{weight}: to do a weight rescale ;
|
||||
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
|
||||
#' \item \code{group}.
|
||||
#' }
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#' labels <- getinfo(dtrain, 'label')
|
||||
#' setinfo(dtrain, 'label', 1-labels)
|
||||
#' labels2 <- getinfo(dtrain, 'label')
|
||||
#' stopifnot(all(labels2 == 1-labels))
|
||||
#' @rdname setinfo
|
||||
#' @export
|
||||
setinfo <- function(object, ...){
|
||||
UseMethod("setinfo")
|
||||
}
|
||||
|
||||
#' @param object Object of class "xgb.DMatrix"
|
||||
#' @param name the name of the field to get
|
||||
#' @param info the specific field of information to set
|
||||
#' @param ... other parameters
|
||||
#' @rdname setinfo
|
||||
#' @method setinfo xgb.DMatrix
|
||||
setMethod("setinfo", signature = "xgb.DMatrix",
|
||||
definition = function(object, name, info) {
|
||||
xgb.setinfo(object, name, info)
|
||||
})
|
||||
@ -1,44 +0,0 @@
|
||||
setClass('xgb.DMatrix')
|
||||
|
||||
#' Get a new DMatrix containing the specified rows of
|
||||
#' orginal xgb.DMatrix object
|
||||
#'
|
||||
#' Get a new DMatrix containing the specified rows of
|
||||
#' orginal xgb.DMatrix object
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#' dsub <- slice(dtrain, 1:3)
|
||||
#' @rdname slice
|
||||
#' @export
|
||||
slice <- function(object, ...){
|
||||
UseMethod("slice")
|
||||
}
|
||||
|
||||
#' @param object Object of class "xgb.DMatrix"
|
||||
#' @param idxset a integer vector of indices of rows needed
|
||||
#' @param ... other parameters
|
||||
#' @rdname slice
|
||||
#' @method slice xgb.DMatrix
|
||||
setMethod("slice", signature = "xgb.DMatrix",
|
||||
definition = function(object, idxset, ...) {
|
||||
if (class(object) != "xgb.DMatrix") {
|
||||
stop("slice: first argument dtrain must be xgb.DMatrix")
|
||||
}
|
||||
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset,
|
||||
PACKAGE = "xgboost")
|
||||
|
||||
attr_list <- attributes(object)
|
||||
nr <- xgb.numrow(object)
|
||||
len <- sapply(attr_list,length)
|
||||
ind <- which(len == nr)
|
||||
if (length(ind) > 0) {
|
||||
nms <- names(attr_list)[ind]
|
||||
for (i in 1:length(ind)) {
|
||||
attr(ret,nms[i]) <- attr(object,nms[i])[idxset]
|
||||
}
|
||||
}
|
||||
return(structure(ret, class = "xgb.DMatrix"))
|
||||
})
|
||||
@ -1,23 +0,0 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/nrow.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
\name{nrow,xgb.DMatrix-method}
|
||||
\alias{nrow,xgb.DMatrix-method}
|
||||
\title{Number of xgb.DMatrix rows}
|
||||
\usage{
|
||||
\S4method{nrow}{xgb.DMatrix}(x)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Object of class \code{xgb.DMatrix}}
|
||||
}
|
||||
\description{
|
||||
\code{nrow} return the number of rows present in the \code{xgb.DMatrix}.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
train <- agaricus.train
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
stopifnot(nrow(dtrain) == nrow(train$data))
|
||||
|
||||
}
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/predict.xgb.Booster.R
|
||||
\docType{methods}
|
||||
\name{predict,xgb.Booster-method}
|
||||
\alias{predict,xgb.Booster-method}
|
||||
\title{Predict method for eXtreme Gradient Boosting model}
|
||||
\usage{
|
||||
\S4method{predict}{xgb.Booster}(object, newdata, missing = NA,
|
||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.Boost"}
|
||||
|
||||
\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or
|
||||
\code{xgb.DMatrix}.}
|
||||
|
||||
\item{missing}{Missing is only used when input is dense matrix, pick a float
|
||||
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
|
||||
|
||||
\item{outputmargin}{whether the prediction should be shown in the original
|
||||
value of sum of functions, when outputmargin=TRUE, the prediction is
|
||||
untransformed margin value. In logistic regression, outputmargin=T will
|
||||
output value before logistic transformation.}
|
||||
|
||||
\item{ntreelimit}{limit number of trees used in prediction, this parameter is
|
||||
only valid for gbtree, but not for gblinear. set it to be value bigger
|
||||
than 0. It will use all trees by default.}
|
||||
|
||||
\item{predleaf}{whether predict leaf index instead. If set to TRUE, the output will be a matrix object.}
|
||||
}
|
||||
\description{
|
||||
Predicted values based on xgboost model object.
|
||||
}
|
||||
\details{
|
||||
The option \code{ntreelimit} purpose is to let the user train a model with lots
|
||||
of trees but use only the first trees for prediction to avoid overfitting
|
||||
(without having to train a new model with less trees).
|
||||
|
||||
The option \code{predleaf} purpose is inspired from §3.1 of the paper
|
||||
\code{Practical Lessons from Predicting Clicks on Ads at Facebook}.
|
||||
The idea is to use the model as a generator of new features which capture non linear link
|
||||
from original features.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||
pred <- predict(bst, test$data)
|
||||
}
|
||||
|
||||
@ -1,18 +0,0 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/predict.xgb.Booster.handle.R
|
||||
\docType{methods}
|
||||
\name{predict,xgb.Booster.handle-method}
|
||||
\alias{predict,xgb.Booster.handle-method}
|
||||
\title{Predict method for eXtreme Gradient Boosting model handle}
|
||||
\usage{
|
||||
\S4method{predict}{xgb.Booster.handle}(object, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.Boost.handle"}
|
||||
|
||||
\item{...}{Parameters pass to \code{predict.xgb.Booster}}
|
||||
}
|
||||
\description{
|
||||
Predicted values based on xgb.Booster.handle object.
|
||||
}
|
||||
|
||||
@ -1,31 +0,0 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/slice.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
\name{slice}
|
||||
\alias{slice}
|
||||
\alias{slice,xgb.DMatrix-method}
|
||||
\title{Get a new DMatrix containing the specified rows of
|
||||
orginal xgb.DMatrix object}
|
||||
\usage{
|
||||
slice(object, ...)
|
||||
|
||||
\S4method{slice}{xgb.DMatrix}(object, idxset, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.DMatrix"}
|
||||
|
||||
\item{...}{other parameters}
|
||||
|
||||
\item{idxset}{a integer vector of indices of rows needed}
|
||||
}
|
||||
\description{
|
||||
Get a new DMatrix containing the specified rows of
|
||||
orginal xgb.DMatrix object
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
train <- agaricus.train
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
dsub <- slice(dtrain, 1:3)
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user