consolidated DMatrix&Booster stuff into xgb.DMatrix.R & xgb.Booster.R

This commit is contained in:
Vadim Khotilovich 2016-03-27 19:17:13 -05:00
parent 1d504d6c6c
commit d27bfb61b0
10 changed files with 0 additions and 378 deletions

View File

@ -1,55 +0,0 @@
setClass('xgb.DMatrix')
#' Get information of an xgb.DMatrix object
#'
#' Get information of an xgb.DMatrix object
#'
#' The information can be one of the following:
#'
#' \itemize{
#' \item \code{label}: label Xgboost learn from ;
#' \item \code{weight}: to do a weight rescale ;
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
#' \item \code{nrow}: number of rows of the \code{xgb.DMatrix}.
#' }
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#' labels <- getinfo(dtrain, 'label')
#' setinfo(dtrain, 'label', 1-labels)
#' labels2 <- getinfo(dtrain, 'label')
#' stopifnot(all(labels2 == 1-labels))
#' @rdname getinfo
#' @export
getinfo <- function(object, ...){
UseMethod("getinfo")
}
#' @param object Object of class \code{xgb.DMatrix}
#' @param name the name of the field to get
#' @param ... other parameters
#' @rdname getinfo
#' @method getinfo xgb.DMatrix
setMethod("getinfo", signature = "xgb.DMatrix",
definition = function(object, name) {
if (typeof(name) != "character") {
stop("xgb.getinfo: name must be character")
}
if (class(object) != "xgb.DMatrix") {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
}
if (name != "label" && name != "weight" &&
name != "base_margin" && name != "nrow") {
stop(paste("xgb.getinfo: unknown info name", name))
}
if (name != "nrow"){
ret <- .Call("XGDMatrixGetInfo_R", object, name, PACKAGE = "xgboost")
} else {
ret <- xgb.numrow(object)
}
return(ret)
})

View File

@ -1,19 +0,0 @@
setGeneric("nrow")
#' @title Number of xgb.DMatrix rows
#' @description \code{nrow} return the number of rows present in the \code{xgb.DMatrix}.
#' @param x Object of class \code{xgb.DMatrix}
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#' stopifnot(nrow(dtrain) == nrow(train$data))
#'
#' @export
setMethod("nrow",
signature = "xgb.DMatrix",
definition = function(x) {
xgb.numrow(x)
}
)

View File

@ -1,80 +0,0 @@
setClass("xgb.Booster.handle")
setClass("xgb.Booster",
slots = c(handle = "xgb.Booster.handle",
raw = "raw"))
#' Predict method for eXtreme Gradient Boosting model
#'
#' Predicted values based on xgboost model object.
#'
#' @param object Object of class "xgb.Boost"
#' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or
#' \code{xgb.DMatrix}.
#' @param missing Missing is only used when input is dense matrix, pick a float
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#' @param outputmargin whether the prediction should be shown in the original
#' value of sum of functions, when outputmargin=TRUE, the prediction is
#' untransformed margin value. In logistic regression, outputmargin=T will
#' output value before logistic transformation.
#' @param ntreelimit limit number of trees used in prediction, this parameter is
#' only valid for gbtree, but not for gblinear. set it to be value bigger
#' than 0. It will use all trees by default.
#' @param predleaf whether predict leaf index instead. If set to TRUE, the output will be a matrix object.
#'
#' @details
#' The option \code{ntreelimit} purpose is to let the user train a model with lots
#' of trees but use only the first trees for prediction to avoid overfitting
#' (without having to train a new model with less trees).
#'
#' The option \code{predleaf} purpose is inspired from §3.1 of the paper
#' \code{Practical Lessons from Predicting Clicks on Ads at Facebook}.
#' The idea is to use the model as a generator of new features which capture non linear link
#' from original features.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' data(agaricus.test, package='xgboost')
#' train <- agaricus.train
#' test <- agaricus.test
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' pred <- predict(bst, test$data)
#' @export
setMethod("predict", signature = "xgb.Booster",
definition = function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {
if (class(object) != "xgb.Booster"){
stop("predict: model in prediction must be of class xgb.Booster")
} else {
object <- xgb.Booster.check(object, saveraw = FALSE)
}
if (class(newdata) != "xgb.DMatrix") {
newdata <- xgb.DMatrix(newdata, missing = missing)
}
if (is.null(ntreelimit)) {
ntreelimit <- 0
} else {
if (ntreelimit < 1){
stop("predict: ntreelimit must be equal to or greater than 1")
}
}
option <- 0
if (outputmargin) {
option <- option + 1
}
if (predleaf) {
option <- option + 2
}
ret <- .Call("XGBoosterPredict_R", object$handle, newdata, as.integer(option),
as.integer(ntreelimit), PACKAGE = "xgboost")
if (predleaf){
len <- getinfo(newdata, "nrow")
if (length(ret) == len){
ret <- matrix(ret,ncol = 1)
} else {
ret <- matrix(ret, ncol = len)
ret <- t(ret)
}
}
return(ret)
})

View File

@ -1,18 +0,0 @@
#' Predict method for eXtreme Gradient Boosting model handle
#'
#' Predicted values based on xgb.Booster.handle object.
#'
#' @param object Object of class "xgb.Boost.handle"
#' @param ... Parameters pass to \code{predict.xgb.Booster}
#'
setMethod("predict", signature = "xgb.Booster.handle",
definition = function(object, ...) {
if (class(object) != "xgb.Booster.handle"){
stop("predict: model in prediction must be of class xgb.Booster.handle")
}
bst <- xgb.handleToBooster(object)
ret <- predict(bst, ...)
return(ret)
})

View File

@ -1,37 +0,0 @@
#' Set information of an xgb.DMatrix object
#'
#' Set information of an xgb.DMatrix object
#'
#' It can be one of the following:
#'
#' \itemize{
#' \item \code{label}: label Xgboost learn from ;
#' \item \code{weight}: to do a weight rescale ;
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
#' \item \code{group}.
#' }
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#' labels <- getinfo(dtrain, 'label')
#' setinfo(dtrain, 'label', 1-labels)
#' labels2 <- getinfo(dtrain, 'label')
#' stopifnot(all(labels2 == 1-labels))
#' @rdname setinfo
#' @export
setinfo <- function(object, ...){
UseMethod("setinfo")
}
#' @param object Object of class "xgb.DMatrix"
#' @param name the name of the field to get
#' @param info the specific field of information to set
#' @param ... other parameters
#' @rdname setinfo
#' @method setinfo xgb.DMatrix
setMethod("setinfo", signature = "xgb.DMatrix",
definition = function(object, name, info) {
xgb.setinfo(object, name, info)
})

View File

@ -1,44 +0,0 @@
setClass('xgb.DMatrix')
#' Get a new DMatrix containing the specified rows of
#' orginal xgb.DMatrix object
#'
#' Get a new DMatrix containing the specified rows of
#' orginal xgb.DMatrix object
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#' dsub <- slice(dtrain, 1:3)
#' @rdname slice
#' @export
slice <- function(object, ...){
UseMethod("slice")
}
#' @param object Object of class "xgb.DMatrix"
#' @param idxset a integer vector of indices of rows needed
#' @param ... other parameters
#' @rdname slice
#' @method slice xgb.DMatrix
setMethod("slice", signature = "xgb.DMatrix",
definition = function(object, idxset, ...) {
if (class(object) != "xgb.DMatrix") {
stop("slice: first argument dtrain must be xgb.DMatrix")
}
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset,
PACKAGE = "xgboost")
attr_list <- attributes(object)
nr <- xgb.numrow(object)
len <- sapply(attr_list,length)
ind <- which(len == nr)
if (length(ind) > 0) {
nms <- names(attr_list)[ind]
for (i in 1:length(ind)) {
attr(ret,nms[i]) <- attr(object,nms[i])[idxset]
}
}
return(structure(ret, class = "xgb.DMatrix"))
})

View File

@ -1,23 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nrow.xgb.DMatrix.R
\docType{methods}
\name{nrow,xgb.DMatrix-method}
\alias{nrow,xgb.DMatrix-method}
\title{Number of xgb.DMatrix rows}
\usage{
\S4method{nrow}{xgb.DMatrix}(x)
}
\arguments{
\item{x}{Object of class \code{xgb.DMatrix}}
}
\description{
\code{nrow} return the number of rows present in the \code{xgb.DMatrix}.
}
\examples{
data(agaricus.train, package='xgboost')
train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label)
stopifnot(nrow(dtrain) == nrow(train$data))
}

View File

@ -1,53 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.R
\docType{methods}
\name{predict,xgb.Booster-method}
\alias{predict,xgb.Booster-method}
\title{Predict method for eXtreme Gradient Boosting model}
\usage{
\S4method{predict}{xgb.Booster}(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE)
}
\arguments{
\item{object}{Object of class "xgb.Boost"}
\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or
\code{xgb.DMatrix}.}
\item{missing}{Missing is only used when input is dense matrix, pick a float
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
\item{outputmargin}{whether the prediction should be shown in the original
value of sum of functions, when outputmargin=TRUE, the prediction is
untransformed margin value. In logistic regression, outputmargin=T will
output value before logistic transformation.}
\item{ntreelimit}{limit number of trees used in prediction, this parameter is
only valid for gbtree, but not for gblinear. set it to be value bigger
than 0. It will use all trees by default.}
\item{predleaf}{whether predict leaf index instead. If set to TRUE, the output will be a matrix object.}
}
\description{
Predicted values based on xgboost model object.
}
\details{
The option \code{ntreelimit} purpose is to let the user train a model with lots
of trees but use only the first trees for prediction to avoid overfitting
(without having to train a new model with less trees).
The option \code{predleaf} purpose is inspired from §3.1 of the paper
\code{Practical Lessons from Predicting Clicks on Ads at Facebook}.
The idea is to use the model as a generator of new features which capture non linear link
from original features.
}
\examples{
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
pred <- predict(bst, test$data)
}

View File

@ -1,18 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.handle.R
\docType{methods}
\name{predict,xgb.Booster.handle-method}
\alias{predict,xgb.Booster.handle-method}
\title{Predict method for eXtreme Gradient Boosting model handle}
\usage{
\S4method{predict}{xgb.Booster.handle}(object, ...)
}
\arguments{
\item{object}{Object of class "xgb.Boost.handle"}
\item{...}{Parameters pass to \code{predict.xgb.Booster}}
}
\description{
Predicted values based on xgb.Booster.handle object.
}

View File

@ -1,31 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/slice.xgb.DMatrix.R
\docType{methods}
\name{slice}
\alias{slice}
\alias{slice,xgb.DMatrix-method}
\title{Get a new DMatrix containing the specified rows of
orginal xgb.DMatrix object}
\usage{
slice(object, ...)
\S4method{slice}{xgb.DMatrix}(object, idxset, ...)
}
\arguments{
\item{object}{Object of class "xgb.DMatrix"}
\item{...}{other parameters}
\item{idxset}{a integer vector of indices of rows needed}
}
\description{
Get a new DMatrix containing the specified rows of
orginal xgb.DMatrix object
}
\examples{
data(agaricus.train, package='xgboost')
train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label)
dsub <- slice(dtrain, 1:3)
}