Merge pull request #1050 from khotilov/S4toS3

[R-package] Convert to S3; some new DMatrix methods
This commit is contained in:
Tong He 2016-04-03 11:52:08 -07:00
commit 4af55518f2
23 changed files with 635 additions and 387 deletions

View File

@ -1,6 +1,16 @@
# Generated by roxygen2: do not edit by hand
S3method("[",xgb.DMatrix)
S3method("dimnames<-",xgb.DMatrix)
S3method(dim,xgb.DMatrix)
S3method(dimnames,xgb.DMatrix)
S3method(getinfo,xgb.DMatrix)
S3method(predict,xgb.Booster)
S3method(predict,xgb.Booster.handle)
S3method(setinfo,xgb.DMatrix)
S3method(slice,xgb.DMatrix)
export(getinfo)
export(print.xgb.DMatrix)
export(setinfo)
export(slice)
export(xgb.DMatrix)
@ -19,8 +29,6 @@ export(xgb.save)
export(xgb.save.raw)
export(xgb.train)
export(xgboost)
exportMethods(nrow)
exportMethods(predict)
import(methods)
importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgeMatrix)

View File

@ -1,55 +0,0 @@
setClass('xgb.DMatrix')
#' Get information of an xgb.DMatrix object
#'
#' Get information of an xgb.DMatrix object
#'
#' The information can be one of the following:
#'
#' \itemize{
#' \item \code{label}: label Xgboost learn from ;
#' \item \code{weight}: to do a weight rescale ;
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
#' \item \code{nrow}: number of rows of the \code{xgb.DMatrix}.
#' }
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#' labels <- getinfo(dtrain, 'label')
#' setinfo(dtrain, 'label', 1-labels)
#' labels2 <- getinfo(dtrain, 'label')
#' stopifnot(all(labels2 == 1-labels))
#' @rdname getinfo
#' @export
getinfo <- function(object, ...){
UseMethod("getinfo")
}
#' @param object Object of class \code{xgb.DMatrix}
#' @param name the name of the field to get
#' @param ... other parameters
#' @rdname getinfo
#' @method getinfo xgb.DMatrix
setMethod("getinfo", signature = "xgb.DMatrix",
definition = function(object, name) {
if (typeof(name) != "character") {
stop("xgb.getinfo: name must be character")
}
if (class(object) != "xgb.DMatrix") {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
}
if (name != "label" && name != "weight" &&
name != "base_margin" && name != "nrow") {
stop(paste("xgb.getinfo: unknown info name", name))
}
if (name != "nrow"){
ret <- .Call("XGDMatrixGetInfo_R", object, name, PACKAGE = "xgboost")
} else {
ret <- xgb.numrow(object)
}
return(ret)
})

View File

@ -1,19 +0,0 @@
setGeneric("nrow")
#' @title Number of xgb.DMatrix rows
#' @description \code{nrow} return the number of rows present in the \code{xgb.DMatrix}.
#' @param x Object of class \code{xgb.DMatrix}
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#' stopifnot(nrow(dtrain) == nrow(train$data))
#'
#' @export
setMethod("nrow",
signature = "xgb.DMatrix",
definition = function(x) {
xgb.numrow(x)
}
)

View File

@ -1,18 +0,0 @@
#' Predict method for eXtreme Gradient Boosting model handle
#'
#' Predicted values based on xgb.Booster.handle object.
#'
#' @param object Object of class "xgb.Boost.handle"
#' @param ... Parameters pass to \code{predict.xgb.Booster}
#'
setMethod("predict", signature = "xgb.Booster.handle",
definition = function(object, ...) {
if (class(object) != "xgb.Booster.handle"){
stop("predict: model in prediction must be of class xgb.Booster.handle")
}
bst <- xgb.handleToBooster(object)
ret <- predict(bst, ...)
return(ret)
})

View File

@ -1,37 +0,0 @@
#' Set information of an xgb.DMatrix object
#'
#' Set information of an xgb.DMatrix object
#'
#' It can be one of the following:
#'
#' \itemize{
#' \item \code{label}: label Xgboost learn from ;
#' \item \code{weight}: to do a weight rescale ;
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
#' \item \code{group}.
#' }
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#' labels <- getinfo(dtrain, 'label')
#' setinfo(dtrain, 'label', 1-labels)
#' labels2 <- getinfo(dtrain, 'label')
#' stopifnot(all(labels2 == 1-labels))
#' @rdname setinfo
#' @export
setinfo <- function(object, ...){
UseMethod("setinfo")
}
#' @param object Object of class "xgb.DMatrix"
#' @param name the name of the field to get
#' @param info the specific field of information to set
#' @param ... other parameters
#' @rdname setinfo
#' @method setinfo xgb.DMatrix
setMethod("setinfo", signature = "xgb.DMatrix",
definition = function(object, name, info) {
xgb.setinfo(object, name, info)
})

View File

@ -1,44 +0,0 @@
setClass('xgb.DMatrix')
#' Get a new DMatrix containing the specified rows of
#' orginal xgb.DMatrix object
#'
#' Get a new DMatrix containing the specified rows of
#' orginal xgb.DMatrix object
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#' dsub <- slice(dtrain, 1:3)
#' @rdname slice
#' @export
slice <- function(object, ...){
UseMethod("slice")
}
#' @param object Object of class "xgb.DMatrix"
#' @param idxset a integer vector of indices of rows needed
#' @param ... other parameters
#' @rdname slice
#' @method slice xgb.DMatrix
setMethod("slice", signature = "xgb.DMatrix",
definition = function(object, idxset, ...) {
if (class(object) != "xgb.DMatrix") {
stop("slice: first argument dtrain must be xgb.DMatrix")
}
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset,
PACKAGE = "xgboost")
attr_list <- attributes(object)
nr <- xgb.numrow(object)
len <- sapply(attr_list,length)
ind <- which(len == nr)
if (length(ind) > 0) {
nms <- names(attr_list)[ind]
for (i in 1:length(ind)) {
attr(ret,nms[i]) <- attr(object,nms[i])[idxset]
}
}
return(structure(ret, class = "xgb.DMatrix"))
})

View File

@ -1,4 +1,4 @@
#' @importClassesFrom Matrix dgCMatrix dgeMatrix
#' @importClassesFrom Matrix dgCMatrix dgeMatrix
#' @import methods
# depends on matrix
@ -9,131 +9,10 @@
library.dynam.unload("xgboost", libpath)
}
# set information into dmatrix, this mutate dmatrix
xgb.setinfo <- function(dmat, name, info) {
if (class(dmat) != "xgb.DMatrix") {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
}
if (name == "label") {
if (length(info) != xgb.numrow(dmat))
stop("The length of labels must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
PACKAGE = "xgboost")
return(TRUE)
}
if (name == "weight") {
if (length(info) != xgb.numrow(dmat))
stop("The length of weights must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
PACKAGE = "xgboost")
return(TRUE)
}
if (name == "base_margin") {
# if (length(info)!=xgb.numrow(dmat))
# stop("The length of base margin must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
PACKAGE = "xgboost")
return(TRUE)
}
if (name == "group") {
if (sum(info) != xgb.numrow(dmat))
stop("The sum of groups must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info),
PACKAGE = "xgboost")
return(TRUE)
}
stop(paste("xgb.setinfo: unknown info name", name))
return(FALSE)
}
# construct a Booster from cachelist
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(cachelist) != "list") {
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
}
for (dm in cachelist) {
if (class(dm) != "xgb.DMatrix") {
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
}
}
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
if (length(params) != 0) {
for (i in 1:length(params)) {
p <- params[i]
.Call("XGBoosterSetParam_R", handle, gsub("\\.", "_", names(p)), as.character(p),
PACKAGE = "xgboost")
}
}
if (!is.null(modelfile)) {
if (typeof(modelfile) == "character") {
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
} else if (typeof(modelfile) == "raw") {
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
} else {
stop("xgb.Booster: modelfile must be character or raw vector")
}
}
return(structure(handle, class = "xgb.Booster.handle"))
}
# convert xgb.Booster.handle to xgb.Booster
xgb.handleToBooster <- function(handle, raw = NULL)
{
bst <- list(handle = handle, raw = raw)
class(bst) <- "xgb.Booster"
return(bst)
}
# Check whether an xgb.Booster object is complete
xgb.Booster.check <- function(bst, saveraw = TRUE)
{
isnull <- is.null(bst$handle)
if (!isnull) {
isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost")
}
if (isnull) {
bst$handle <- xgb.Booster(modelfile = bst$raw)
} else {
if (is.null(bst$raw) && saveraw)
bst$raw <- xgb.save.raw(bst$handle)
}
return(bst)
}
## ----the following are low level iteratively function, not needed if
## ----the following are low level iterative functions, not needed if
## you do not want to use them ---------------------------------------
# get dmatrix from data, label
xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
inClass <- class(data)
if (inClass == "dgCMatrix" || inClass == "matrix") {
if (is.null(label)) {
stop("xgboost: need label when data is a matrix")
}
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
if (!is.null(weight)){
xgb.setinfo(dtrain, "weight", weight)
}
} else {
if (!is.null(label)) {
warning("xgboost: label will be ignored.")
}
if (inClass == "character") {
dtrain <- xgb.DMatrix(data)
} else if (inClass == "xgb.DMatrix") {
dtrain <- data
} else if (inClass == "data.frame") {
stop("xgboost only support numerical matrix input,
use 'data.matrix' to transform the data.")
} else {
stop("xgboost: Invalid input of data")
}
}
return (dtrain)
}
xgb.numrow <- function(dmat) {
nrow <- .Call("XGDMatrixNumRow_R", dmat, PACKAGE="xgboost")
return(nrow)
}
# iteratively update booster with customized statistics
xgb.iter.boost <- function(booster, dtrain, gpair) {
if (class(booster) != "xgb.Booster.handle") {
@ -227,7 +106,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
"\tConsider providing pre-computed CV-folds through the folds parameter.")
}
y <- getinfo(dall, 'label')
randidx <- sample(1 : xgb.numrow(dall))
randidx <- sample(1 : nrow(dall))
if (stratified & length(y) == length(randidx)) {
y <- y[randidx]
#

View File

@ -1,13 +1,65 @@
setClass("xgb.Booster.handle")
setClass("xgb.Booster",
slots = c(handle = "xgb.Booster.handle",
raw = "raw"))
# Construct a Booster from cachelist
# internal utility function
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(cachelist) != "list") {
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
}
for (dm in cachelist) {
if (class(dm) != "xgb.DMatrix") {
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
}
}
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
if (length(params) != 0) {
for (i in 1:length(params)) {
p <- params[i]
.Call("XGBoosterSetParam_R", handle, gsub("\\.", "_", names(p)), as.character(p),
PACKAGE = "xgboost")
}
}
if (!is.null(modelfile)) {
if (typeof(modelfile) == "character") {
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
} else if (typeof(modelfile) == "raw") {
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
} else {
stop("xgb.Booster: modelfile must be character or raw vector")
}
}
return(structure(handle, class = "xgb.Booster.handle"))
}
# Convert xgb.Booster.handle to xgb.Booster
# internal utility function
xgb.handleToBooster <- function(handle, raw = NULL)
{
bst <- list(handle = handle, raw = raw)
class(bst) <- "xgb.Booster"
return(bst)
}
# Check whether an xgb.Booster object is complete
# internal utility function
xgb.Booster.check <- function(bst, saveraw = TRUE)
{
isnull <- is.null(bst$handle)
if (!isnull) {
isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost")
}
if (isnull) {
bst$handle <- xgb.Booster(modelfile = bst$raw)
} else {
if (is.null(bst$raw) && saveraw)
bst$raw <- xgb.save.raw(bst$handle)
}
return(bst)
}
#' Predict method for eXtreme Gradient Boosting model
#'
#' Predicted values based on xgboost model object.
#' Predicted values based on either xgboost model or model handle object.
#'
#' @param object Object of class "xgb.Boost"
#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}
#' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or
#' \code{xgb.DMatrix}.
#' @param missing Missing is only used when input is dense matrix, pick a float
@ -20,6 +72,7 @@ setClass("xgb.Booster",
#' only valid for gbtree, but not for gblinear. set it to be value bigger
#' than 0. It will use all trees by default.
#' @param predleaf whether predict leaf index instead. If set to TRUE, the output will be a matrix object.
#' @param ... Parameters pass to \code{predict.xgb.Booster}
#'
#' @details
#' The option \code{ntreelimit} purpose is to let the user train a model with lots
@ -36,13 +89,14 @@ setClass("xgb.Booster",
#' data(agaricus.test, package='xgboost')
#' train <- agaricus.train
#' test <- agaricus.test
#'
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
#' pred <- predict(bst, test$data)
#' @rdname predict.xgb.Booster
#' @export
setMethod("predict", signature = "xgb.Booster",
definition = function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {
predict.xgb.Booster <- function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {
if (class(object) != "xgb.Booster"){
stop("predict: model in prediction must be of class xgb.Booster")
} else {
@ -77,4 +131,14 @@ setMethod("predict", signature = "xgb.Booster",
}
}
return(ret)
})
}
#' @rdname predict.xgb.Booster
#' @export
predict.xgb.Booster.handle <- function(object, ...) {
bst <- xgb.handleToBooster(object)
ret <- predict(bst, ...)
return(ret)
}

View File

@ -1,9 +1,9 @@
#' Contruct xgb.DMatrix object
#'
#' Contruct xgb.DMatrix object from dense matrix, sparse matrix or local file.
#' Contruct xgb.DMatrix object from dense matrix, sparse matrix
#' or local file (that was created previously by saving an \code{xgb.DMatrix}).
#'
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character
#' indicating the data file.
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param info a list of information of the xgb.DMatrix object
#' @param missing Missing is only used when input is dense matrix, pick a float
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
@ -18,27 +18,348 @@
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' @export
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
cnames <- NULL
if (typeof(data) == "character") {
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),
PACKAGE = "xgboost")
} else if (is.matrix(data)) {
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing,
PACKAGE = "xgboost")
cnames <- colnames(data)
} else if (class(data) == "dgCMatrix") {
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x,
PACKAGE = "xgboost")
cnames <- colnames(data)
} else {
stop(paste("xgb.DMatrix: does not support to construct from ",
typeof(data)))
}
dmat <- structure(handle, class = "xgb.DMatrix")
dmat <- handle
attributes(dmat) <- list(.Dimnames = list(NULL, cnames), class = "xgb.DMatrix")
#dmat <- list(handle = handle, colnames = cnames)
#attr(dmat, 'class') <- "xgb.DMatrix"
info <- append(info, list(...))
if (length(info) == 0)
return(dmat)
for (i in 1:length(info)) {
p <- info[i]
xgb.setinfo(dmat, names(p), p[[1]])
setinfo(dmat, names(p), p[[1]])
}
return(dmat)
}
# get dmatrix from data, label
# internal helper method
xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
inClass <- class(data)
if (inClass == "dgCMatrix" || inClass == "matrix") {
if (is.null(label)) {
stop("xgboost: need label when data is a matrix")
}
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
if (!is.null(weight)){
setinfo(dtrain, "weight", weight)
}
} else {
if (!is.null(label)) {
warning("xgboost: label will be ignored.")
}
if (inClass == "character") {
dtrain <- xgb.DMatrix(data)
} else if (inClass == "xgb.DMatrix") {
dtrain <- data
} else if (inClass == "data.frame") {
stop("xgboost only support numerical matrix input,
use 'data.matrix' to transform the data.")
} else {
stop("xgboost: Invalid input of data")
}
}
return (dtrain)
}
#' Dimensions of xgb.DMatrix
#'
#' Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}.
#' @param x Object of class \code{xgb.DMatrix}
#'
#' @details
#' Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
#' be directly used with an \code{xgb.DMatrix} object.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#'
#' stopifnot(nrow(dtrain) == nrow(train$data))
#' stopifnot(ncol(dtrain) == ncol(train$data))
#' stopifnot(all(dim(dtrain) == dim(train$data)))
#'
#' @export
dim.xgb.DMatrix <- function(x) {
c(.Call("XGDMatrixNumRow_R", x, PACKAGE="xgboost"),
.Call("XGDMatrixNumCol_R", x, PACKAGE="xgboost"))
}
#' Handling of column names of \code{xgb.DMatrix}
#'
#' Only column names are supported for \code{xgb.DMatrix}, thus setting of
#' row names would have no effect and returnten row names would be NULL.
#'
#' @param x object of class \code{xgb.DMatrix}
#' @param value a list of two elements: the first one is ignored
#' and the second one is column names
#'
#' @details
#' Generic \code{dimnames} methods are used by \code{colnames}.
#' Since row names are irrelevant, it is recommended to use \code{colnames} directly.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#' dimnames(dtrain)
#' colnames(dtrain)
#' colnames(dtrain) <- make.names(1:ncol(train$data))
#' print(dtrain, verbose=TRUE)
#'
#' @rdname dimnames.xgb.DMatrix
#' @export
dimnames.xgb.DMatrix <- function(x) {
attr(x, '.Dimnames')
}
#' @rdname dimnames.xgb.DMatrix
#' @export
`dimnames<-.xgb.DMatrix` <- function(x, value) {
if (!is.list(value) || length(value) != 2L)
stop("invalid 'dimnames' given: must be a list of two elements")
if (!is.null(value[[1L]]))
stop("xgb.DMatrix does not have rownames")
if (is.null(value[[2]])) {
attr(x, '.Dimnames') <- NULL
return(x)
}
if (ncol(x) != length(value[[2]]))
stop("can't assign ", length(value[[2]]), " colnames to a ",
ncol(x), " column xgb.DMatrix")
attr(x, '.Dimnames') <- value
x
}
#' Get information of an xgb.DMatrix object
#'
#' Get information of an xgb.DMatrix object
#' @param object Object of class \code{xgb.DMatrix}
#' @param name the name of the information field to get (see details)
#' @param ... other parameters
#'
#' @details
#' The \code{name} field can be one of the following:
#'
#' \itemize{
#' \item \code{label}: label Xgboost learn from ;
#' \item \code{weight}: to do a weight rescale ;
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
#' \item \code{nrow}: number of rows of the \code{xgb.DMatrix}.
#' }
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#'
#' labels <- getinfo(dtrain, 'label')
#' setinfo(dtrain, 'label', 1-labels)
#'
#' labels2 <- getinfo(dtrain, 'label')
#' stopifnot(all(labels2 == 1-labels))
#' @rdname getinfo
#' @export
getinfo <- function(object, ...) UseMethod("getinfo")
#' @rdname getinfo
#' @export
getinfo.xgb.DMatrix <- function(object, name) {
if (typeof(name) != "character") {
stop("getinfo: name must be character")
}
if (name != "label" && name != "weight" &&
name != "base_margin" && name != "nrow") {
stop(paste("getinfo: unknown info name", name))
}
if (name != "nrow"){
ret <- .Call("XGDMatrixGetInfo_R", object, name, PACKAGE = "xgboost")
} else {
ret <- nrow(object)
}
return(ret)
}
#' Set information of an xgb.DMatrix object
#'
#' Set information of an xgb.DMatrix object
#'
#' @param object Object of class "xgb.DMatrix"
#' @param name the name of the field to get
#' @param info the specific field of information to set
#' @param ... other parameters
#'
#' @details
#' The \code{name} field can be one of the following:
#'
#' \itemize{
#' \item \code{label}: label Xgboost learn from ;
#' \item \code{weight}: to do a weight rescale ;
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
#' \item \code{group}.
#' }
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#'
#' labels <- getinfo(dtrain, 'label')
#' setinfo(dtrain, 'label', 1-labels)
#' labels2 <- getinfo(dtrain, 'label')
#' stopifnot(all.equal(labels2, 1-labels))
#' @rdname setinfo
#' @export
setinfo <- function(object, ...) UseMethod("setinfo")
#' @rdname setinfo
#' @export
setinfo.xgb.DMatrix <- function(object, name, info) {
if (name == "label") {
if (length(info) != nrow(object))
stop("The length of labels must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", object, name, as.numeric(info),
PACKAGE = "xgboost")
return(TRUE)
}
if (name == "weight") {
if (length(info) != nrow(object))
stop("The length of weights must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", object, name, as.numeric(info),
PACKAGE = "xgboost")
return(TRUE)
}
if (name == "base_margin") {
# if (length(info)!=nrow(object))
# stop("The length of base margin must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", object, name, as.numeric(info),
PACKAGE = "xgboost")
return(TRUE)
}
if (name == "group") {
if (sum(info) != nrow(object))
stop("The sum of groups must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", object, name, as.integer(info),
PACKAGE = "xgboost")
return(TRUE)
}
stop(paste("setinfo: unknown info name", name))
return(FALSE)
}
#' Get a new DMatrix containing the specified rows of
#' orginal xgb.DMatrix object
#'
#' Get a new DMatrix containing the specified rows of
#' orginal xgb.DMatrix object
#'
#' @param object Object of class "xgb.DMatrix"
#' @param idxset a integer vector of indices of rows needed
#' @param colset currently not used (columns subsetting is not available)
#' @param ... other parameters (currently not used)
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#'
#' dsub <- slice(dtrain, 1:42)
#' labels1 <- getinfo(dsub, 'label')
#' dsub <- dtrain[1:42, ]
#' labels2 <- getinfo(dsub, 'label')
#' all.equal(labels1, labels2)
#'
#' @rdname slice.xgb.DMatrix
#' @export
slice <- function(object, ...) UseMethod("slice")
#' @rdname slice.xgb.DMatrix
#' @export
slice.xgb.DMatrix <- function(object, idxset, ...) {
if (class(object) != "xgb.DMatrix") {
stop("slice: first argument dtrain must be xgb.DMatrix")
}
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, PACKAGE = "xgboost")
attr_list <- attributes(object)
nr <- nrow(object)
len <- sapply(attr_list, length)
ind <- which(len == nr)
if (length(ind) > 0) {
nms <- names(attr_list)[ind]
for (i in 1:length(ind)) {
attr(ret, nms[i]) <- attr(object, nms[i])[idxset]
}
}
return(structure(ret, class = "xgb.DMatrix"))
}
#' @rdname slice.xgb.DMatrix
#' @export
`[.xgb.DMatrix` <- function(object, idxset, colset=NULL) {
slice(object, idxset)
}
#' Print xgb.DMatrix
#'
#' Print information about xgb.DMatrix.
#' Currently it displays dimensions and presence of info-fields and colnames.
#'
#' @param x an xgb.DMatrix object
#' @param verbose whether to print colnames (when present)
#' @param ... not currently used
#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
#'
#' dtrain
#' print(dtrain, verbose=TRUE)
#' @export
print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ')
infos <- c()
if(length(getinfo(x, 'label')) > 0) infos <- 'label'
if(length(getinfo(x, 'weight')) > 0) infos <- c(infos, 'weight')
if(length(getinfo(x, 'base_margin')) > 0) infos <- c(infos, 'base_margin')
if (length(infos) == 0) infos <- 'NA'
cat(infos)
cnames <- colnames(x)
cat(' colnames:')
if (verbose & !is.null(cnames)) {
cat("\n'")
cat(cnames, sep="','")
cat("'")
} else {
if (is.null(cnames)) cat(' no')
else cat(' yes')
}
cat("\n")
invisible(x)
}

View File

@ -169,11 +169,11 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
num_class <- params[['num_class']]
if (is.null(num_class))
stop('must set num_class to use softmax')
predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
predictValues <- matrix(0, nrow(dtrain), num_class)
mat_pred <- TRUE
}
else
predictValues <- rep(0,xgb.numrow(dtrain))
predictValues <- rep(0, nrow(dtrain))
history <- c()
print.every.n <- max(as.integer(print.every.n), 1L)
for (i in 1:nrounds) {

View File

@ -0,0 +1,29 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.DMatrix.R
\name{dim.xgb.DMatrix}
\alias{dim.xgb.DMatrix}
\title{Dimensions of xgb.DMatrix}
\usage{
\method{dim}{xgb.DMatrix}(x)
}
\arguments{
\item{x}{Object of class \code{xgb.DMatrix}}
}
\description{
Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}.
}
\details{
Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
be directly used with an \code{xgb.DMatrix} object.
}
\examples{
data(agaricus.train, package='xgboost')
train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label)
stopifnot(nrow(dtrain) == nrow(train$data))
stopifnot(ncol(dtrain) == ncol(train$data))
stopifnot(all(dim(dtrain) == dim(train$data)))
}

View File

@ -0,0 +1,36 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.DMatrix.R
\name{dimnames.xgb.DMatrix}
\alias{dimnames.xgb.DMatrix}
\alias{dimnames<-.xgb.DMatrix}
\title{Handling of column names of \code{xgb.DMatrix}}
\usage{
\method{dimnames}{xgb.DMatrix}(x)
\method{dimnames}{xgb.DMatrix}(x) <- value
}
\arguments{
\item{x}{object of class \code{xgb.DMatrix}}
\item{value}{a list of two elements: the first one is ignored
and the second one is column names}
}
\description{
Only column names are supported for \code{xgb.DMatrix}, thus setting of
row names would have no effect and returnten row names would be NULL.
}
\details{
Generic \code{dimnames} methods are used by \code{colnames}.
Since row names are irrelevant, it is recommended to use \code{colnames} directly.
}
\examples{
data(agaricus.train, package='xgboost')
train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label)
dimnames(dtrain)
colnames(dtrain)
colnames(dtrain) <- make.names(1:ncol(train$data))
print(dtrain, verbose=TRUE)
}

View File

@ -1,27 +1,26 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getinfo.xgb.DMatrix.R
\docType{methods}
% Please edit documentation in R/xgb.DMatrix.R
\name{getinfo}
\alias{getinfo}
\alias{getinfo,xgb.DMatrix-method}
\alias{getinfo.xgb.DMatrix}
\title{Get information of an xgb.DMatrix object}
\usage{
getinfo(object, ...)
\S4method{getinfo}{xgb.DMatrix}(object, name)
\method{getinfo}{xgb.DMatrix}(object, name)
}
\arguments{
\item{object}{Object of class \code{xgb.DMatrix}}
\item{...}{other parameters}
\item{name}{the name of the field to get}
\item{name}{the name of the information field to get (see details)}
}
\description{
Get information of an xgb.DMatrix object
}
\details{
The information can be one of the following:
The \code{name} field can be one of the following:
\itemize{
\item \code{label}: label Xgboost learn from ;
@ -34,8 +33,10 @@ The information can be one of the following:
data(agaricus.train, package='xgboost')
train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label)
labels <- getinfo(dtrain, 'label')
setinfo(dtrain, 'label', 1-labels)
labels2 <- getinfo(dtrain, 'label')
stopifnot(all(labels2 == 1-labels))
}

View File

@ -1,23 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nrow.xgb.DMatrix.R
\docType{methods}
\name{nrow,xgb.DMatrix-method}
\alias{nrow,xgb.DMatrix-method}
\title{Number of xgb.DMatrix rows}
\usage{
\S4method{nrow}{xgb.DMatrix}(x)
}
\arguments{
\item{x}{Object of class \code{xgb.DMatrix}}
}
\description{
\code{nrow} return the number of rows present in the \code{xgb.DMatrix}.
}
\examples{
data(agaricus.train, package='xgboost')
train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label)
stopifnot(nrow(dtrain) == nrow(train$data))
}

View File

@ -1,18 +0,0 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.handle.R
\docType{methods}
\name{predict,xgb.Booster.handle-method}
\alias{predict,xgb.Booster.handle-method}
\title{Predict method for eXtreme Gradient Boosting model handle}
\usage{
\S4method{predict}{xgb.Booster.handle}(object, ...)
}
\arguments{
\item{object}{Object of class "xgb.Boost.handle"}
\item{...}{Parameters pass to \code{predict.xgb.Booster}}
}
\description{
Predicted values based on xgb.Booster.handle object.
}

View File

@ -1,15 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/predict.xgb.Booster.R
\docType{methods}
\name{predict,xgb.Booster-method}
\alias{predict,xgb.Booster-method}
% Please edit documentation in R/xgb.Booster.R
\name{predict.xgb.Booster}
\alias{predict.xgb.Booster}
\alias{predict.xgb.Booster.handle}
\title{Predict method for eXtreme Gradient Boosting model}
\usage{
\S4method{predict}{xgb.Booster}(object, newdata, missing = NA,
\method{predict}{xgb.Booster}(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE)
\method{predict}{xgb.Booster.handle}(object, ...)
}
\arguments{
\item{object}{Object of class "xgb.Boost"}
\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}}
\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or
\code{xgb.DMatrix}.}
@ -27,9 +29,11 @@ only valid for gbtree, but not for gblinear. set it to be value bigger
than 0. It will use all trees by default.}
\item{predleaf}{whether predict leaf index instead. If set to TRUE, the output will be a matrix object.}
\item{...}{Parameters pass to \code{predict.xgb.Booster}}
}
\description{
Predicted values based on xgboost model object.
Predicted values based on either xgboost model or model handle object.
}
\details{
The option \code{ntreelimit} purpose is to let the user train a model with lots
@ -46,6 +50,7 @@ data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
pred <- predict(bst, test$data)

View File

@ -0,0 +1,28 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.DMatrix.R
\name{print.xgb.DMatrix}
\alias{print.xgb.DMatrix}
\title{Print xgb.DMatrix}
\usage{
print.xgb.DMatrix(x, verbose = FALSE, ...)
}
\arguments{
\item{x}{an xgb.DMatrix object}
\item{verbose}{whether to print colnames (when present)}
\item{...}{not currently used}
}
\description{
Print information about xgb.DMatrix.
Currently it displays dimensions and presence of info-fields and colnames.
}
\examples{
data(agaricus.train, package='xgboost')
train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label)
dtrain
print(dtrain, verbose=TRUE)
}

View File

@ -1,14 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/setinfo.xgb.DMatrix.R
\docType{methods}
% Please edit documentation in R/xgb.DMatrix.R
\name{setinfo}
\alias{setinfo}
\alias{setinfo,xgb.DMatrix-method}
\alias{setinfo.xgb.DMatrix}
\title{Set information of an xgb.DMatrix object}
\usage{
setinfo(object, ...)
\S4method{setinfo}{xgb.DMatrix}(object, name, info)
\method{setinfo}{xgb.DMatrix}(object, name, info)
}
\arguments{
\item{object}{Object of class "xgb.DMatrix"}
@ -23,7 +22,7 @@ setinfo(object, ...)
Set information of an xgb.DMatrix object
}
\details{
It can be one of the following:
The \code{name} field can be one of the following:
\itemize{
\item \code{label}: label Xgboost learn from ;
@ -36,9 +35,10 @@ It can be one of the following:
data(agaricus.train, package='xgboost')
train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label)
labels <- getinfo(dtrain, 'label')
setinfo(dtrain, 'label', 1-labels)
labels2 <- getinfo(dtrain, 'label')
stopifnot(all(labels2 == 1-labels))
stopifnot(all.equal(labels2, 1-labels))
}

View File

@ -1,22 +1,26 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/slice.xgb.DMatrix.R
\docType{methods}
% Please edit documentation in R/xgb.DMatrix.R
\name{slice}
\alias{[.xgb.DMatrix}
\alias{slice}
\alias{slice,xgb.DMatrix-method}
\alias{slice.xgb.DMatrix}
\title{Get a new DMatrix containing the specified rows of
orginal xgb.DMatrix object}
\usage{
slice(object, ...)
\S4method{slice}{xgb.DMatrix}(object, idxset, ...)
\method{slice}{xgb.DMatrix}(object, idxset, ...)
\method{[}{xgb.DMatrix}(object, idxset, colset = NULL)
}
\arguments{
\item{object}{Object of class "xgb.DMatrix"}
\item{...}{other parameters}
\item{...}{other parameters (currently not used)}
\item{idxset}{a integer vector of indices of rows needed}
\item{colset}{currently not used (columns subsetting is not available)}
}
\description{
Get a new DMatrix containing the specified rows of
@ -26,6 +30,12 @@ orginal xgb.DMatrix object
data(agaricus.train, package='xgboost')
train <- agaricus.train
dtrain <- xgb.DMatrix(train$data, label=train$label)
dsub <- slice(dtrain, 1:3)
dsub <- slice(dtrain, 1:42)
labels1 <- getinfo(dsub, 'label')
dsub <- dtrain[1:42, ]
labels2 <- getinfo(dsub, 'label')
all.equal(labels1, labels2)
}

View File

@ -7,8 +7,7 @@
xgb.DMatrix(data, info = list(), missing = NA, ...)
}
\arguments{
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character
indicating the data file.}
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename}
\item{info}{a list of information of the xgb.DMatrix object}
@ -18,7 +17,8 @@ value that represents missing value. Sometime a data use 0 or other extreme valu
\item{...}{other information to pass to \code{info}.}
}
\description{
Contruct xgb.DMatrix object from dense matrix, sparse matrix or local file.
Contruct xgb.DMatrix object from dense matrix, sparse matrix
or local file (that was created previously by saving an \code{xgb.DMatrix}).
}
\examples{
data(agaricus.train, package='xgboost')

View File

@ -195,6 +195,14 @@ SEXP XGDMatrixNumRow_R(SEXP handle) {
return ScalarInteger(static_cast<int>(nrow));
}
SEXP XGDMatrixNumCol_R(SEXP handle) {
bst_ulong ncol;
R_API_BEGIN();
CHECK_CALL(XGDMatrixNumCol(R_ExternalPtrAddr(handle), &ncol));
R_API_END();
return ScalarInteger(static_cast<int>(ncol));
}
// functions related to booster
void _BoosterFinalizer(SEXP ext) {
if (R_ExternalPtrAddr(ext) == NULL) return;

View File

@ -83,10 +83,16 @@ XGB_DLL SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field);
/*!
* \brief return number of rows
* \param handle a instance of data matrix
* \param handle an instance of data matrix
*/
XGB_DLL SEXP XGDMatrixNumRow_R(SEXP handle);
/*!
* \brief return number of columns
* \param handle an instance of data matrix
*/
XGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle);
/*!
* \brief create xgboost learner
* \param dmats a list of dmatrix handles that will be cached

View File

@ -0,0 +1,67 @@
require(xgboost)
context("testing xgb.DMatrix functionality")
data(agaricus.test, package='xgboost')
test_data <- agaricus.test$data[1:100,]
test_label <- agaricus.test$label[1:100]
test_that("xgb.DMatrix: basic construction, saving, loading", {
# from sparse matrix
dtest1 <- xgb.DMatrix(test_data, label=test_label)
# from dense matrix
dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label'))
# save to a local file
tmp_file <- tempfile('xgb.DMatrix_')
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
# read from a local file
dtest3 <- xgb.DMatrix(tmp_file)
unlink(tmp_file)
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
})
test_that("xgb.DMatrix: getinfo & setinfo", {
dtest <- xgb.DMatrix(test_data)
expect_true(setinfo(dtest, 'label', test_label))
labels <- getinfo(dtest, 'label')
expect_equal(test_label, getinfo(dtest, 'label'))
expect_true(length(getinfo(dtest, 'weight')) == 0)
expect_true(length(getinfo(dtest, 'base_margin')) == 0)
expect_true(setinfo(dtest, 'weight', test_label))
expect_true(setinfo(dtest, 'base_margin', test_label))
expect_true(setinfo(dtest, 'group', c(50,50)))
expect_error(setinfo(dtest, 'group', test_label))
# providing character values will give a warning
expect_warning( setinfo(dtest, 'weight', rep('a', nrow(test_data))) )
# any other label should error
expect_error(setinfo(dtest, 'asdf', test_label))
})
test_that("xgb.DMatrix: slice, dim", {
dtest <- xgb.DMatrix(test_data, label=test_label)
expect_equal(dim(dtest), dim(test_data))
dsub1 <- slice(dtest, 1:42)
expect_equal(nrow(dsub1), 42)
expect_equal(ncol(dsub1), ncol(test_data))
dsub2 <- dtest[1:42,]
expect_equal(dim(dtest), dim(test_data))
expect_equal(getinfo(dsub1, 'label'), getinfo(dsub2, 'label'))
})
test_that("xgb.DMatrix: colnames", {
dtest <- xgb.DMatrix(test_data, label=test_label)
expect_equal(colnames(dtest), colnames(test_data))
expect_error( colnames(dtest) <- 'asdf')
new_names <- make.names(1:ncol(test_data))
expect_silent( colnames(dtest) <- new_names)
expect_equal(colnames(dtest), new_names)
expect_silent(colnames(dtest) <- NULL)
expect_null(colnames(dtest))
})