consolidated DMatrix&Booster stuff into xgb.DMatrix.R & xgb.Booster.R
This commit is contained in:
parent
1d504d6c6c
commit
d27bfb61b0
@ -1,55 +0,0 @@
|
|||||||
setClass('xgb.DMatrix')
|
|
||||||
|
|
||||||
#' Get information of an xgb.DMatrix object
|
|
||||||
#'
|
|
||||||
#' Get information of an xgb.DMatrix object
|
|
||||||
#'
|
|
||||||
#' The information can be one of the following:
|
|
||||||
#'
|
|
||||||
#' \itemize{
|
|
||||||
#' \item \code{label}: label Xgboost learn from ;
|
|
||||||
#' \item \code{weight}: to do a weight rescale ;
|
|
||||||
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
|
|
||||||
#' \item \code{nrow}: number of rows of the \code{xgb.DMatrix}.
|
|
||||||
#' }
|
|
||||||
#'
|
|
||||||
#' @examples
|
|
||||||
#' data(agaricus.train, package='xgboost')
|
|
||||||
#' train <- agaricus.train
|
|
||||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
#' labels <- getinfo(dtrain, 'label')
|
|
||||||
#' setinfo(dtrain, 'label', 1-labels)
|
|
||||||
#' labels2 <- getinfo(dtrain, 'label')
|
|
||||||
#' stopifnot(all(labels2 == 1-labels))
|
|
||||||
#' @rdname getinfo
|
|
||||||
#' @export
|
|
||||||
getinfo <- function(object, ...){
|
|
||||||
UseMethod("getinfo")
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#' @param object Object of class \code{xgb.DMatrix}
|
|
||||||
#' @param name the name of the field to get
|
|
||||||
#' @param ... other parameters
|
|
||||||
#' @rdname getinfo
|
|
||||||
#' @method getinfo xgb.DMatrix
|
|
||||||
setMethod("getinfo", signature = "xgb.DMatrix",
|
|
||||||
definition = function(object, name) {
|
|
||||||
if (typeof(name) != "character") {
|
|
||||||
stop("xgb.getinfo: name must be character")
|
|
||||||
}
|
|
||||||
if (class(object) != "xgb.DMatrix") {
|
|
||||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
|
|
||||||
}
|
|
||||||
if (name != "label" && name != "weight" &&
|
|
||||||
name != "base_margin" && name != "nrow") {
|
|
||||||
stop(paste("xgb.getinfo: unknown info name", name))
|
|
||||||
}
|
|
||||||
if (name != "nrow"){
|
|
||||||
ret <- .Call("XGDMatrixGetInfo_R", object, name, PACKAGE = "xgboost")
|
|
||||||
} else {
|
|
||||||
ret <- xgb.numrow(object)
|
|
||||||
}
|
|
||||||
return(ret)
|
|
||||||
})
|
|
||||||
@ -1,19 +0,0 @@
|
|||||||
setGeneric("nrow")
|
|
||||||
|
|
||||||
#' @title Number of xgb.DMatrix rows
|
|
||||||
#' @description \code{nrow} return the number of rows present in the \code{xgb.DMatrix}.
|
|
||||||
#' @param x Object of class \code{xgb.DMatrix}
|
|
||||||
#'
|
|
||||||
#' @examples
|
|
||||||
#' data(agaricus.train, package='xgboost')
|
|
||||||
#' train <- agaricus.train
|
|
||||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
#' stopifnot(nrow(dtrain) == nrow(train$data))
|
|
||||||
#'
|
|
||||||
#' @export
|
|
||||||
setMethod("nrow",
|
|
||||||
signature = "xgb.DMatrix",
|
|
||||||
definition = function(x) {
|
|
||||||
xgb.numrow(x)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
@ -1,80 +0,0 @@
|
|||||||
setClass("xgb.Booster.handle")
|
|
||||||
setClass("xgb.Booster",
|
|
||||||
slots = c(handle = "xgb.Booster.handle",
|
|
||||||
raw = "raw"))
|
|
||||||
|
|
||||||
#' Predict method for eXtreme Gradient Boosting model
|
|
||||||
#'
|
|
||||||
#' Predicted values based on xgboost model object.
|
|
||||||
#'
|
|
||||||
#' @param object Object of class "xgb.Boost"
|
|
||||||
#' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or
|
|
||||||
#' \code{xgb.DMatrix}.
|
|
||||||
#' @param missing Missing is only used when input is dense matrix, pick a float
|
|
||||||
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
|
|
||||||
#' @param outputmargin whether the prediction should be shown in the original
|
|
||||||
#' value of sum of functions, when outputmargin=TRUE, the prediction is
|
|
||||||
#' untransformed margin value. In logistic regression, outputmargin=T will
|
|
||||||
#' output value before logistic transformation.
|
|
||||||
#' @param ntreelimit limit number of trees used in prediction, this parameter is
|
|
||||||
#' only valid for gbtree, but not for gblinear. set it to be value bigger
|
|
||||||
#' than 0. It will use all trees by default.
|
|
||||||
#' @param predleaf whether predict leaf index instead. If set to TRUE, the output will be a matrix object.
|
|
||||||
#'
|
|
||||||
#' @details
|
|
||||||
#' The option \code{ntreelimit} purpose is to let the user train a model with lots
|
|
||||||
#' of trees but use only the first trees for prediction to avoid overfitting
|
|
||||||
#' (without having to train a new model with less trees).
|
|
||||||
#'
|
|
||||||
#' The option \code{predleaf} purpose is inspired from §3.1 of the paper
|
|
||||||
#' \code{Practical Lessons from Predicting Clicks on Ads at Facebook}.
|
|
||||||
#' The idea is to use the model as a generator of new features which capture non linear link
|
|
||||||
#' from original features.
|
|
||||||
#'
|
|
||||||
#' @examples
|
|
||||||
#' data(agaricus.train, package='xgboost')
|
|
||||||
#' data(agaricus.test, package='xgboost')
|
|
||||||
#' train <- agaricus.train
|
|
||||||
#' test <- agaricus.test
|
|
||||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
|
||||||
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
|
||||||
#' pred <- predict(bst, test$data)
|
|
||||||
#' @export
|
|
||||||
setMethod("predict", signature = "xgb.Booster",
|
|
||||||
definition = function(object, newdata, missing = NA,
|
|
||||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {
|
|
||||||
if (class(object) != "xgb.Booster"){
|
|
||||||
stop("predict: model in prediction must be of class xgb.Booster")
|
|
||||||
} else {
|
|
||||||
object <- xgb.Booster.check(object, saveraw = FALSE)
|
|
||||||
}
|
|
||||||
if (class(newdata) != "xgb.DMatrix") {
|
|
||||||
newdata <- xgb.DMatrix(newdata, missing = missing)
|
|
||||||
}
|
|
||||||
if (is.null(ntreelimit)) {
|
|
||||||
ntreelimit <- 0
|
|
||||||
} else {
|
|
||||||
if (ntreelimit < 1){
|
|
||||||
stop("predict: ntreelimit must be equal to or greater than 1")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
option <- 0
|
|
||||||
if (outputmargin) {
|
|
||||||
option <- option + 1
|
|
||||||
}
|
|
||||||
if (predleaf) {
|
|
||||||
option <- option + 2
|
|
||||||
}
|
|
||||||
ret <- .Call("XGBoosterPredict_R", object$handle, newdata, as.integer(option),
|
|
||||||
as.integer(ntreelimit), PACKAGE = "xgboost")
|
|
||||||
if (predleaf){
|
|
||||||
len <- getinfo(newdata, "nrow")
|
|
||||||
if (length(ret) == len){
|
|
||||||
ret <- matrix(ret,ncol = 1)
|
|
||||||
} else {
|
|
||||||
ret <- matrix(ret, ncol = len)
|
|
||||||
ret <- t(ret)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return(ret)
|
|
||||||
})
|
|
||||||
@ -1,18 +0,0 @@
|
|||||||
#' Predict method for eXtreme Gradient Boosting model handle
|
|
||||||
#'
|
|
||||||
#' Predicted values based on xgb.Booster.handle object.
|
|
||||||
#'
|
|
||||||
#' @param object Object of class "xgb.Boost.handle"
|
|
||||||
#' @param ... Parameters pass to \code{predict.xgb.Booster}
|
|
||||||
#'
|
|
||||||
setMethod("predict", signature = "xgb.Booster.handle",
|
|
||||||
definition = function(object, ...) {
|
|
||||||
if (class(object) != "xgb.Booster.handle"){
|
|
||||||
stop("predict: model in prediction must be of class xgb.Booster.handle")
|
|
||||||
}
|
|
||||||
|
|
||||||
bst <- xgb.handleToBooster(object)
|
|
||||||
|
|
||||||
ret <- predict(bst, ...)
|
|
||||||
return(ret)
|
|
||||||
})
|
|
||||||
@ -1,37 +0,0 @@
|
|||||||
#' Set information of an xgb.DMatrix object
|
|
||||||
#'
|
|
||||||
#' Set information of an xgb.DMatrix object
|
|
||||||
#'
|
|
||||||
#' It can be one of the following:
|
|
||||||
#'
|
|
||||||
#' \itemize{
|
|
||||||
#' \item \code{label}: label Xgboost learn from ;
|
|
||||||
#' \item \code{weight}: to do a weight rescale ;
|
|
||||||
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
|
|
||||||
#' \item \code{group}.
|
|
||||||
#' }
|
|
||||||
#'
|
|
||||||
#' @examples
|
|
||||||
#' data(agaricus.train, package='xgboost')
|
|
||||||
#' train <- agaricus.train
|
|
||||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
#' labels <- getinfo(dtrain, 'label')
|
|
||||||
#' setinfo(dtrain, 'label', 1-labels)
|
|
||||||
#' labels2 <- getinfo(dtrain, 'label')
|
|
||||||
#' stopifnot(all(labels2 == 1-labels))
|
|
||||||
#' @rdname setinfo
|
|
||||||
#' @export
|
|
||||||
setinfo <- function(object, ...){
|
|
||||||
UseMethod("setinfo")
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @param object Object of class "xgb.DMatrix"
|
|
||||||
#' @param name the name of the field to get
|
|
||||||
#' @param info the specific field of information to set
|
|
||||||
#' @param ... other parameters
|
|
||||||
#' @rdname setinfo
|
|
||||||
#' @method setinfo xgb.DMatrix
|
|
||||||
setMethod("setinfo", signature = "xgb.DMatrix",
|
|
||||||
definition = function(object, name, info) {
|
|
||||||
xgb.setinfo(object, name, info)
|
|
||||||
})
|
|
||||||
@ -1,44 +0,0 @@
|
|||||||
setClass('xgb.DMatrix')
|
|
||||||
|
|
||||||
#' Get a new DMatrix containing the specified rows of
|
|
||||||
#' orginal xgb.DMatrix object
|
|
||||||
#'
|
|
||||||
#' Get a new DMatrix containing the specified rows of
|
|
||||||
#' orginal xgb.DMatrix object
|
|
||||||
#'
|
|
||||||
#' @examples
|
|
||||||
#' data(agaricus.train, package='xgboost')
|
|
||||||
#' train <- agaricus.train
|
|
||||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
#' dsub <- slice(dtrain, 1:3)
|
|
||||||
#' @rdname slice
|
|
||||||
#' @export
|
|
||||||
slice <- function(object, ...){
|
|
||||||
UseMethod("slice")
|
|
||||||
}
|
|
||||||
|
|
||||||
#' @param object Object of class "xgb.DMatrix"
|
|
||||||
#' @param idxset a integer vector of indices of rows needed
|
|
||||||
#' @param ... other parameters
|
|
||||||
#' @rdname slice
|
|
||||||
#' @method slice xgb.DMatrix
|
|
||||||
setMethod("slice", signature = "xgb.DMatrix",
|
|
||||||
definition = function(object, idxset, ...) {
|
|
||||||
if (class(object) != "xgb.DMatrix") {
|
|
||||||
stop("slice: first argument dtrain must be xgb.DMatrix")
|
|
||||||
}
|
|
||||||
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset,
|
|
||||||
PACKAGE = "xgboost")
|
|
||||||
|
|
||||||
attr_list <- attributes(object)
|
|
||||||
nr <- xgb.numrow(object)
|
|
||||||
len <- sapply(attr_list,length)
|
|
||||||
ind <- which(len == nr)
|
|
||||||
if (length(ind) > 0) {
|
|
||||||
nms <- names(attr_list)[ind]
|
|
||||||
for (i in 1:length(ind)) {
|
|
||||||
attr(ret,nms[i]) <- attr(object,nms[i])[idxset]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return(structure(ret, class = "xgb.DMatrix"))
|
|
||||||
})
|
|
||||||
@ -1,23 +0,0 @@
|
|||||||
% Generated by roxygen2: do not edit by hand
|
|
||||||
% Please edit documentation in R/nrow.xgb.DMatrix.R
|
|
||||||
\docType{methods}
|
|
||||||
\name{nrow,xgb.DMatrix-method}
|
|
||||||
\alias{nrow,xgb.DMatrix-method}
|
|
||||||
\title{Number of xgb.DMatrix rows}
|
|
||||||
\usage{
|
|
||||||
\S4method{nrow}{xgb.DMatrix}(x)
|
|
||||||
}
|
|
||||||
\arguments{
|
|
||||||
\item{x}{Object of class \code{xgb.DMatrix}}
|
|
||||||
}
|
|
||||||
\description{
|
|
||||||
\code{nrow} return the number of rows present in the \code{xgb.DMatrix}.
|
|
||||||
}
|
|
||||||
\examples{
|
|
||||||
data(agaricus.train, package='xgboost')
|
|
||||||
train <- agaricus.train
|
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
stopifnot(nrow(dtrain) == nrow(train$data))
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -1,53 +0,0 @@
|
|||||||
% Generated by roxygen2: do not edit by hand
|
|
||||||
% Please edit documentation in R/predict.xgb.Booster.R
|
|
||||||
\docType{methods}
|
|
||||||
\name{predict,xgb.Booster-method}
|
|
||||||
\alias{predict,xgb.Booster-method}
|
|
||||||
\title{Predict method for eXtreme Gradient Boosting model}
|
|
||||||
\usage{
|
|
||||||
\S4method{predict}{xgb.Booster}(object, newdata, missing = NA,
|
|
||||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE)
|
|
||||||
}
|
|
||||||
\arguments{
|
|
||||||
\item{object}{Object of class "xgb.Boost"}
|
|
||||||
|
|
||||||
\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or
|
|
||||||
\code{xgb.DMatrix}.}
|
|
||||||
|
|
||||||
\item{missing}{Missing is only used when input is dense matrix, pick a float
|
|
||||||
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
|
|
||||||
|
|
||||||
\item{outputmargin}{whether the prediction should be shown in the original
|
|
||||||
value of sum of functions, when outputmargin=TRUE, the prediction is
|
|
||||||
untransformed margin value. In logistic regression, outputmargin=T will
|
|
||||||
output value before logistic transformation.}
|
|
||||||
|
|
||||||
\item{ntreelimit}{limit number of trees used in prediction, this parameter is
|
|
||||||
only valid for gbtree, but not for gblinear. set it to be value bigger
|
|
||||||
than 0. It will use all trees by default.}
|
|
||||||
|
|
||||||
\item{predleaf}{whether predict leaf index instead. If set to TRUE, the output will be a matrix object.}
|
|
||||||
}
|
|
||||||
\description{
|
|
||||||
Predicted values based on xgboost model object.
|
|
||||||
}
|
|
||||||
\details{
|
|
||||||
The option \code{ntreelimit} purpose is to let the user train a model with lots
|
|
||||||
of trees but use only the first trees for prediction to avoid overfitting
|
|
||||||
(without having to train a new model with less trees).
|
|
||||||
|
|
||||||
The option \code{predleaf} purpose is inspired from §3.1 of the paper
|
|
||||||
\code{Practical Lessons from Predicting Clicks on Ads at Facebook}.
|
|
||||||
The idea is to use the model as a generator of new features which capture non linear link
|
|
||||||
from original features.
|
|
||||||
}
|
|
||||||
\examples{
|
|
||||||
data(agaricus.train, package='xgboost')
|
|
||||||
data(agaricus.test, package='xgboost')
|
|
||||||
train <- agaricus.train
|
|
||||||
test <- agaricus.test
|
|
||||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
|
||||||
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
|
||||||
pred <- predict(bst, test$data)
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -1,18 +0,0 @@
|
|||||||
% Generated by roxygen2: do not edit by hand
|
|
||||||
% Please edit documentation in R/predict.xgb.Booster.handle.R
|
|
||||||
\docType{methods}
|
|
||||||
\name{predict,xgb.Booster.handle-method}
|
|
||||||
\alias{predict,xgb.Booster.handle-method}
|
|
||||||
\title{Predict method for eXtreme Gradient Boosting model handle}
|
|
||||||
\usage{
|
|
||||||
\S4method{predict}{xgb.Booster.handle}(object, ...)
|
|
||||||
}
|
|
||||||
\arguments{
|
|
||||||
\item{object}{Object of class "xgb.Boost.handle"}
|
|
||||||
|
|
||||||
\item{...}{Parameters pass to \code{predict.xgb.Booster}}
|
|
||||||
}
|
|
||||||
\description{
|
|
||||||
Predicted values based on xgb.Booster.handle object.
|
|
||||||
}
|
|
||||||
|
|
||||||
@ -1,31 +0,0 @@
|
|||||||
% Generated by roxygen2: do not edit by hand
|
|
||||||
% Please edit documentation in R/slice.xgb.DMatrix.R
|
|
||||||
\docType{methods}
|
|
||||||
\name{slice}
|
|
||||||
\alias{slice}
|
|
||||||
\alias{slice,xgb.DMatrix-method}
|
|
||||||
\title{Get a new DMatrix containing the specified rows of
|
|
||||||
orginal xgb.DMatrix object}
|
|
||||||
\usage{
|
|
||||||
slice(object, ...)
|
|
||||||
|
|
||||||
\S4method{slice}{xgb.DMatrix}(object, idxset, ...)
|
|
||||||
}
|
|
||||||
\arguments{
|
|
||||||
\item{object}{Object of class "xgb.DMatrix"}
|
|
||||||
|
|
||||||
\item{...}{other parameters}
|
|
||||||
|
|
||||||
\item{idxset}{a integer vector of indices of rows needed}
|
|
||||||
}
|
|
||||||
\description{
|
|
||||||
Get a new DMatrix containing the specified rows of
|
|
||||||
orginal xgb.DMatrix object
|
|
||||||
}
|
|
||||||
\examples{
|
|
||||||
data(agaricus.train, package='xgboost')
|
|
||||||
train <- agaricus.train
|
|
||||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
|
||||||
dsub <- slice(dtrain, 1:3)
|
|
||||||
}
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user