convert S4 to S3; add some extra methods to DMatrix
This commit is contained in:
parent
d27bfb61b0
commit
71f402ac16
@ -1,6 +1,16 @@
|
||||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
S3method("[",xgb.DMatrix)
|
||||
S3method("dimnames<-",xgb.DMatrix)
|
||||
S3method(dim,xgb.DMatrix)
|
||||
S3method(dimnames,xgb.DMatrix)
|
||||
S3method(getinfo,xgb.DMatrix)
|
||||
S3method(predict,xgb.Booster)
|
||||
S3method(predict,xgb.Booster.handle)
|
||||
S3method(setinfo,xgb.DMatrix)
|
||||
S3method(slice,xgb.DMatrix)
|
||||
export(getinfo)
|
||||
export(print.xgb.DMatrix)
|
||||
export(setinfo)
|
||||
export(slice)
|
||||
export(xgb.DMatrix)
|
||||
@ -19,8 +29,6 @@ export(xgb.save)
|
||||
export(xgb.save.raw)
|
||||
export(xgb.train)
|
||||
export(xgboost)
|
||||
exportMethods(nrow)
|
||||
exportMethods(predict)
|
||||
import(methods)
|
||||
importClassesFrom(Matrix,dgCMatrix)
|
||||
importClassesFrom(Matrix,dgeMatrix)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
#' @importClassesFrom Matrix dgCMatrix dgeMatrix
|
||||
#' @importClassesFrom Matrix dgCMatrix dgeMatrix
|
||||
#' @import methods
|
||||
|
||||
# depends on matrix
|
||||
@ -9,131 +9,10 @@
|
||||
library.dynam.unload("xgboost", libpath)
|
||||
}
|
||||
|
||||
# set information into dmatrix, this mutate dmatrix
|
||||
xgb.setinfo <- function(dmat, name, info) {
|
||||
if (class(dmat) != "xgb.DMatrix") {
|
||||
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
|
||||
}
|
||||
if (name == "label") {
|
||||
if (length(info) != xgb.numrow(dmat))
|
||||
stop("The length of labels must equal to the number of rows in the input data")
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
|
||||
PACKAGE = "xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "weight") {
|
||||
if (length(info) != xgb.numrow(dmat))
|
||||
stop("The length of weights must equal to the number of rows in the input data")
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
|
||||
PACKAGE = "xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "base_margin") {
|
||||
# if (length(info)!=xgb.numrow(dmat))
|
||||
# stop("The length of base margin must equal to the number of rows in the input data")
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
|
||||
PACKAGE = "xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "group") {
|
||||
if (sum(info) != xgb.numrow(dmat))
|
||||
stop("The sum of groups must equal to the number of rows in the input data")
|
||||
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info),
|
||||
PACKAGE = "xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
stop(paste("xgb.setinfo: unknown info name", name))
|
||||
return(FALSE)
|
||||
}
|
||||
|
||||
# construct a Booster from cachelist
|
||||
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||
if (typeof(cachelist) != "list") {
|
||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||
}
|
||||
for (dm in cachelist) {
|
||||
if (class(dm) != "xgb.DMatrix") {
|
||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||
}
|
||||
}
|
||||
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
|
||||
if (length(params) != 0) {
|
||||
for (i in 1:length(params)) {
|
||||
p <- params[i]
|
||||
.Call("XGBoosterSetParam_R", handle, gsub("\\.", "_", names(p)), as.character(p),
|
||||
PACKAGE = "xgboost")
|
||||
}
|
||||
}
|
||||
if (!is.null(modelfile)) {
|
||||
if (typeof(modelfile) == "character") {
|
||||
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
|
||||
} else if (typeof(modelfile) == "raw") {
|
||||
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
|
||||
} else {
|
||||
stop("xgb.Booster: modelfile must be character or raw vector")
|
||||
}
|
||||
}
|
||||
return(structure(handle, class = "xgb.Booster.handle"))
|
||||
}
|
||||
|
||||
# convert xgb.Booster.handle to xgb.Booster
|
||||
xgb.handleToBooster <- function(handle, raw = NULL)
|
||||
{
|
||||
bst <- list(handle = handle, raw = raw)
|
||||
class(bst) <- "xgb.Booster"
|
||||
return(bst)
|
||||
}
|
||||
|
||||
# Check whether an xgb.Booster object is complete
|
||||
xgb.Booster.check <- function(bst, saveraw = TRUE)
|
||||
{
|
||||
isnull <- is.null(bst$handle)
|
||||
if (!isnull) {
|
||||
isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost")
|
||||
}
|
||||
if (isnull) {
|
||||
bst$handle <- xgb.Booster(modelfile = bst$raw)
|
||||
} else {
|
||||
if (is.null(bst$raw) && saveraw)
|
||||
bst$raw <- xgb.save.raw(bst$handle)
|
||||
}
|
||||
return(bst)
|
||||
}
|
||||
|
||||
## ----the following are low level iteratively function, not needed if
|
||||
## ----the following are low level iterative functions, not needed if
|
||||
## you do not want to use them ---------------------------------------
|
||||
# get dmatrix from data, label
|
||||
xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
|
||||
inClass <- class(data)
|
||||
if (inClass == "dgCMatrix" || inClass == "matrix") {
|
||||
if (is.null(label)) {
|
||||
stop("xgboost: need label when data is a matrix")
|
||||
}
|
||||
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
|
||||
if (!is.null(weight)){
|
||||
xgb.setinfo(dtrain, "weight", weight)
|
||||
}
|
||||
} else {
|
||||
if (!is.null(label)) {
|
||||
warning("xgboost: label will be ignored.")
|
||||
}
|
||||
if (inClass == "character") {
|
||||
dtrain <- xgb.DMatrix(data)
|
||||
} else if (inClass == "xgb.DMatrix") {
|
||||
dtrain <- data
|
||||
} else if (inClass == "data.frame") {
|
||||
stop("xgboost only support numerical matrix input,
|
||||
use 'data.matrix' to transform the data.")
|
||||
} else {
|
||||
stop("xgboost: Invalid input of data")
|
||||
}
|
||||
}
|
||||
return (dtrain)
|
||||
}
|
||||
xgb.numrow <- function(dmat) {
|
||||
nrow <- .Call("XGDMatrixNumRow_R", dmat, PACKAGE="xgboost")
|
||||
return(nrow)
|
||||
}
|
||||
|
||||
# iteratively update booster with customized statistics
|
||||
xgb.iter.boost <- function(booster, dtrain, gpair) {
|
||||
if (class(booster) != "xgb.Booster.handle") {
|
||||
@ -227,7 +106,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
|
||||
"\tConsider providing pre-computed CV-folds through the folds parameter.")
|
||||
}
|
||||
y <- getinfo(dall, 'label')
|
||||
randidx <- sample(1 : xgb.numrow(dall))
|
||||
randidx <- sample(1 : nrow(dall))
|
||||
if (stratified & length(y) == length(randidx)) {
|
||||
y <- y[randidx]
|
||||
#
|
||||
|
||||
144
R-package/R/xgb.Booster.R
Normal file
144
R-package/R/xgb.Booster.R
Normal file
@ -0,0 +1,144 @@
|
||||
# Construct a Booster from cachelist
|
||||
# internal utility function
|
||||
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||
if (typeof(cachelist) != "list") {
|
||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||
}
|
||||
for (dm in cachelist) {
|
||||
if (class(dm) != "xgb.DMatrix") {
|
||||
stop("xgb.Booster: only accepts list of DMatrix as cachelist")
|
||||
}
|
||||
}
|
||||
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
|
||||
if (length(params) != 0) {
|
||||
for (i in 1:length(params)) {
|
||||
p <- params[i]
|
||||
.Call("XGBoosterSetParam_R", handle, gsub("\\.", "_", names(p)), as.character(p),
|
||||
PACKAGE = "xgboost")
|
||||
}
|
||||
}
|
||||
if (!is.null(modelfile)) {
|
||||
if (typeof(modelfile) == "character") {
|
||||
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
|
||||
} else if (typeof(modelfile) == "raw") {
|
||||
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
|
||||
} else {
|
||||
stop("xgb.Booster: modelfile must be character or raw vector")
|
||||
}
|
||||
}
|
||||
return(structure(handle, class = "xgb.Booster.handle"))
|
||||
}
|
||||
|
||||
# Convert xgb.Booster.handle to xgb.Booster
|
||||
# internal utility function
|
||||
xgb.handleToBooster <- function(handle, raw = NULL)
|
||||
{
|
||||
bst <- list(handle = handle, raw = raw)
|
||||
class(bst) <- "xgb.Booster"
|
||||
return(bst)
|
||||
}
|
||||
|
||||
# Check whether an xgb.Booster object is complete
|
||||
# internal utility function
|
||||
xgb.Booster.check <- function(bst, saveraw = TRUE)
|
||||
{
|
||||
isnull <- is.null(bst$handle)
|
||||
if (!isnull) {
|
||||
isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost")
|
||||
}
|
||||
if (isnull) {
|
||||
bst$handle <- xgb.Booster(modelfile = bst$raw)
|
||||
} else {
|
||||
if (is.null(bst$raw) && saveraw)
|
||||
bst$raw <- xgb.save.raw(bst$handle)
|
||||
}
|
||||
return(bst)
|
||||
}
|
||||
|
||||
#' Predict method for eXtreme Gradient Boosting model
|
||||
#'
|
||||
#' Predicted values based on either xgboost model or model handle object.
|
||||
#'
|
||||
#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}
|
||||
#' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or
|
||||
#' \code{xgb.DMatrix}.
|
||||
#' @param missing Missing is only used when input is dense matrix, pick a float
|
||||
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
|
||||
#' @param outputmargin whether the prediction should be shown in the original
|
||||
#' value of sum of functions, when outputmargin=TRUE, the prediction is
|
||||
#' untransformed margin value. In logistic regression, outputmargin=T will
|
||||
#' output value before logistic transformation.
|
||||
#' @param ntreelimit limit number of trees used in prediction, this parameter is
|
||||
#' only valid for gbtree, but not for gblinear. set it to be value bigger
|
||||
#' than 0. It will use all trees by default.
|
||||
#' @param predleaf whether predict leaf index instead. If set to TRUE, the output will be a matrix object.
|
||||
#' @param ... Parameters pass to \code{predict.xgb.Booster}
|
||||
#'
|
||||
#' @details
|
||||
#' The option \code{ntreelimit} purpose is to let the user train a model with lots
|
||||
#' of trees but use only the first trees for prediction to avoid overfitting
|
||||
#' (without having to train a new model with less trees).
|
||||
#'
|
||||
#' The option \code{predleaf} purpose is inspired from §3.1 of the paper
|
||||
#' \code{Practical Lessons from Predicting Clicks on Ads at Facebook}.
|
||||
#' The idea is to use the model as a generator of new features which capture non linear link
|
||||
#' from original features.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' data(agaricus.test, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' test <- agaricus.test
|
||||
#'
|
||||
#' bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||
#' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||
#' pred <- predict(bst, test$data)
|
||||
#' @rdname predict.xgb.Booster
|
||||
#' @export
|
||||
predict.xgb.Booster <- function(object, newdata, missing = NA,
|
||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {
|
||||
if (class(object) != "xgb.Booster"){
|
||||
stop("predict: model in prediction must be of class xgb.Booster")
|
||||
} else {
|
||||
object <- xgb.Booster.check(object, saveraw = FALSE)
|
||||
}
|
||||
if (class(newdata) != "xgb.DMatrix") {
|
||||
newdata <- xgb.DMatrix(newdata, missing = missing)
|
||||
}
|
||||
if (is.null(ntreelimit)) {
|
||||
ntreelimit <- 0
|
||||
} else {
|
||||
if (ntreelimit < 1){
|
||||
stop("predict: ntreelimit must be equal to or greater than 1")
|
||||
}
|
||||
}
|
||||
option <- 0
|
||||
if (outputmargin) {
|
||||
option <- option + 1
|
||||
}
|
||||
if (predleaf) {
|
||||
option <- option + 2
|
||||
}
|
||||
ret <- .Call("XGBoosterPredict_R", object$handle, newdata, as.integer(option),
|
||||
as.integer(ntreelimit), PACKAGE = "xgboost")
|
||||
if (predleaf){
|
||||
len <- getinfo(newdata, "nrow")
|
||||
if (length(ret) == len){
|
||||
ret <- matrix(ret,ncol = 1)
|
||||
} else {
|
||||
ret <- matrix(ret, ncol = len)
|
||||
ret <- t(ret)
|
||||
}
|
||||
}
|
||||
return(ret)
|
||||
}
|
||||
|
||||
#' @rdname predict.xgb.Booster
|
||||
#' @export
|
||||
predict.xgb.Booster.handle <- function(object, ...) {
|
||||
|
||||
bst <- xgb.handleToBooster(object)
|
||||
|
||||
ret <- predict(bst, ...)
|
||||
return(ret)
|
||||
}
|
||||
@ -1,9 +1,9 @@
|
||||
#' Contruct xgb.DMatrix object
|
||||
#'
|
||||
#' Contruct xgb.DMatrix object from dense matrix, sparse matrix or local file.
|
||||
#' Contruct xgb.DMatrix object from dense matrix, sparse matrix
|
||||
#' or local file (that was created previously by saving an \code{xgb.DMatrix}).
|
||||
#'
|
||||
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character
|
||||
#' indicating the data file.
|
||||
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
|
||||
#' @param info a list of information of the xgb.DMatrix object
|
||||
#' @param missing Missing is only used when input is dense matrix, pick a float
|
||||
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
|
||||
@ -18,27 +18,339 @@
|
||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||
#' @export
|
||||
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
|
||||
cnames <- NULL
|
||||
if (typeof(data) == "character") {
|
||||
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),
|
||||
PACKAGE = "xgboost")
|
||||
} else if (is.matrix(data)) {
|
||||
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing,
|
||||
PACKAGE = "xgboost")
|
||||
cnames <- colnames(data)
|
||||
} else if (class(data) == "dgCMatrix") {
|
||||
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x,
|
||||
PACKAGE = "xgboost")
|
||||
cnames <- colnames(data)
|
||||
} else {
|
||||
stop(paste("xgb.DMatrix: does not support to construct from ",
|
||||
typeof(data)))
|
||||
}
|
||||
dmat <- structure(handle, class = "xgb.DMatrix")
|
||||
dmat <- handle
|
||||
attributes(dmat) <- list(.Dimnames = list(NULL, cnames), class = "xgb.DMatrix")
|
||||
#dmat <- list(handle = handle, colnames = cnames)
|
||||
#attr(dmat, 'class') <- "xgb.DMatrix"
|
||||
|
||||
info <- append(info, list(...))
|
||||
if (length(info) == 0)
|
||||
return(dmat)
|
||||
for (i in 1:length(info)) {
|
||||
p <- info[i]
|
||||
xgb.setinfo(dmat, names(p), p[[1]])
|
||||
setinfo(dmat, names(p), p[[1]])
|
||||
}
|
||||
return(dmat)
|
||||
}
|
||||
|
||||
|
||||
# get dmatrix from data, label
|
||||
# internal helper method
|
||||
xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
|
||||
inClass <- class(data)
|
||||
if (inClass == "dgCMatrix" || inClass == "matrix") {
|
||||
if (is.null(label)) {
|
||||
stop("xgboost: need label when data is a matrix")
|
||||
}
|
||||
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
|
||||
if (!is.null(weight)){
|
||||
setinfo(dtrain, "weight", weight)
|
||||
}
|
||||
} else {
|
||||
if (!is.null(label)) {
|
||||
warning("xgboost: label will be ignored.")
|
||||
}
|
||||
if (inClass == "character") {
|
||||
dtrain <- xgb.DMatrix(data)
|
||||
} else if (inClass == "xgb.DMatrix") {
|
||||
dtrain <- data
|
||||
} else if (inClass == "data.frame") {
|
||||
stop("xgboost only support numerical matrix input,
|
||||
use 'data.matrix' to transform the data.")
|
||||
} else {
|
||||
stop("xgboost: Invalid input of data")
|
||||
}
|
||||
}
|
||||
return (dtrain)
|
||||
}
|
||||
|
||||
|
||||
#' Dimensions of xgb.DMatrix
|
||||
#'
|
||||
#' Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}.
|
||||
#' @param x Object of class \code{xgb.DMatrix}
|
||||
#'
|
||||
#' @details
|
||||
#' Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
|
||||
#' be directly used with an \code{xgb.DMatrix} object.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#'
|
||||
#' stopifnot(nrow(dtrain) == nrow(train$data))
|
||||
#' stopifnot(ncol(dtrain) == ncol(train$data))
|
||||
#' stopifnot(all(dim(dtrain) == dim(train$data)))
|
||||
#'
|
||||
#' @export
|
||||
dim.xgb.DMatrix <- function(x) {
|
||||
c(.Call("XGDMatrixNumRow_R", x, PACKAGE="xgboost"),
|
||||
.Call("XGDMatrixNumCol_R", x, PACKAGE="xgboost"))
|
||||
}
|
||||
|
||||
|
||||
#' Handling of column names of \code{xgb.DMatrix}
|
||||
#'
|
||||
#' Only column names are supported for \code{xgb.DMatrix}, thus setting of
|
||||
#' row names would have no effect and returnten row names would be NULL.
|
||||
#'
|
||||
#' @param x object of class \code{xgb.DMatrix}
|
||||
#' @param value a list of two elements: the first one is ignored
|
||||
#' and the second one is column names
|
||||
#'
|
||||
#' @details
|
||||
#' Generic \code{dimnames} methods are used by \code{colnames}.
|
||||
#' Since row names are irrelevant, it is recommended to use \code{colnames} directly.
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#' dimnames(dtrain)
|
||||
#' colnames(dtrain)
|
||||
#' colnames(dtrain) <- make.names(1:ncol(train$data))
|
||||
#' print(dtrain, verbose=TRUE)
|
||||
#'
|
||||
#' @rdname dimnames.xgb.DMatrix
|
||||
#' @export
|
||||
dimnames.xgb.DMatrix <- function(x) {
|
||||
attr(x, '.Dimnames')
|
||||
}
|
||||
|
||||
#' @rdname dimnames.xgb.DMatrix
|
||||
#' @export
|
||||
`dimnames<-.xgb.DMatrix` <- function(x, value) {
|
||||
if (!is.list(value) || length(value) != 2L)
|
||||
stop("invalid 'dimnames' given: must be a list of two elements")
|
||||
if (!is.null(value[[1L]]))
|
||||
stop("xgb.DMatrix does not have rownames")
|
||||
if (is.null(value[[2]])) {
|
||||
attr(x, '.Dimnames') <- NULL
|
||||
return(x)
|
||||
}
|
||||
if (ncol(x) != length(value[[2]]))
|
||||
stop("can't assign ", length(value[[2]]), " colnames to a ",
|
||||
ncol(x), " column xgb.DMatrix")
|
||||
attr(x, '.Dimnames') <- value
|
||||
x
|
||||
}
|
||||
|
||||
|
||||
#' Get information of an xgb.DMatrix object
|
||||
#'
|
||||
#' Get information of an xgb.DMatrix object
|
||||
#' @param object Object of class \code{xgb.DMatrix}
|
||||
#' @param name the name of the information field to get (see details)
|
||||
#' @param ... other parameters
|
||||
#'
|
||||
#' @details
|
||||
#' The \code{name} field can be one of the following:
|
||||
#'
|
||||
#' \itemize{
|
||||
#' \item \code{label}: label Xgboost learn from ;
|
||||
#' \item \code{weight}: to do a weight rescale ;
|
||||
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
|
||||
#' \item \code{nrow}: number of rows of the \code{xgb.DMatrix}.
|
||||
#' }
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#'
|
||||
#' labels <- getinfo(dtrain, 'label')
|
||||
#' setinfo(dtrain, 'label', 1-labels)
|
||||
#'
|
||||
#' labels2 <- getinfo(dtrain, 'label')
|
||||
#' stopifnot(all(labels2 == 1-labels))
|
||||
#' @rdname getinfo
|
||||
#' @export
|
||||
getinfo <- function(object, ...) UseMethod("getinfo")
|
||||
|
||||
#' @rdname getinfo
|
||||
#' @export
|
||||
getinfo.xgb.DMatrix <- function(object, name) {
|
||||
if (typeof(name) != "character") {
|
||||
stop("getinfo: name must be character")
|
||||
}
|
||||
if (name != "label" && name != "weight" &&
|
||||
name != "base_margin" && name != "nrow") {
|
||||
stop(paste("getinfo: unknown info name", name))
|
||||
}
|
||||
if (name != "nrow"){
|
||||
ret <- .Call("XGDMatrixGetInfo_R", object, name, PACKAGE = "xgboost")
|
||||
} else {
|
||||
ret <- nrow(object)
|
||||
}
|
||||
return(ret)
|
||||
}
|
||||
|
||||
|
||||
#' Set information of an xgb.DMatrix object
|
||||
#'
|
||||
#' Set information of an xgb.DMatrix object
|
||||
#'
|
||||
#' @param object Object of class "xgb.DMatrix"
|
||||
#' @param name the name of the field to get
|
||||
#' @param info the specific field of information to set
|
||||
#' @param ... other parameters
|
||||
#'
|
||||
#' @details
|
||||
#' The \code{name} field can be one of the following:
|
||||
#'
|
||||
#' \itemize{
|
||||
#' \item \code{label}: label Xgboost learn from ;
|
||||
#' \item \code{weight}: to do a weight rescale ;
|
||||
#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ;
|
||||
#' \item \code{group}.
|
||||
#' }
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#'
|
||||
#' labels <- getinfo(dtrain, 'label')
|
||||
#' setinfo(dtrain, 'label', 1-labels)
|
||||
#' labels2 <- getinfo(dtrain, 'label')
|
||||
#' stopifnot(all.equal(labels2, 1-labels))
|
||||
#' @rdname setinfo
|
||||
#' @export
|
||||
setinfo <- function(object, ...) UseMethod("setinfo")
|
||||
|
||||
#' @rdname setinfo
|
||||
#' @export
|
||||
setinfo.xgb.DMatrix <- function(object, name, info) {
|
||||
if (name == "label") {
|
||||
if (length(info) != nrow(object))
|
||||
stop("The length of labels must equal to the number of rows in the input data")
|
||||
.Call("XGDMatrixSetInfo_R", object, name, as.numeric(info),
|
||||
PACKAGE = "xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "weight") {
|
||||
if (length(info) != nrow(object))
|
||||
stop("The length of weights must equal to the number of rows in the input data")
|
||||
.Call("XGDMatrixSetInfo_R", object, name, as.numeric(info),
|
||||
PACKAGE = "xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "base_margin") {
|
||||
# if (length(info)!=nrow(object))
|
||||
# stop("The length of base margin must equal to the number of rows in the input data")
|
||||
.Call("XGDMatrixSetInfo_R", object, name, as.numeric(info),
|
||||
PACKAGE = "xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
if (name == "group") {
|
||||
if (sum(info) != nrow(object))
|
||||
stop("The sum of groups must equal to the number of rows in the input data")
|
||||
.Call("XGDMatrixSetInfo_R", object, name, as.integer(info),
|
||||
PACKAGE = "xgboost")
|
||||
return(TRUE)
|
||||
}
|
||||
stop(paste("setinfo: unknown info name", name))
|
||||
return(FALSE)
|
||||
}
|
||||
|
||||
|
||||
#' Get a new DMatrix containing the specified rows of
|
||||
#' orginal xgb.DMatrix object
|
||||
#'
|
||||
#' Get a new DMatrix containing the specified rows of
|
||||
#' orginal xgb.DMatrix object
|
||||
#'
|
||||
#' @param object Object of class "xgb.DMatrix"
|
||||
#' @param idxset a integer vector of indices of rows needed
|
||||
#' @param colset currently not used (columns subsetting is not available)
|
||||
#' @param ... other parameters (currently not used)
|
||||
#'
|
||||
#' @examples
|
||||
#' data(agaricus.train, package='xgboost')
|
||||
#' train <- agaricus.train
|
||||
#' dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
#'
|
||||
#' dsub <- slice(dtrain, 1:42)
|
||||
#' labels1 <- getinfo(dsub, 'label')
|
||||
#' dsub <- dtrain[1:42, ]
|
||||
#' labels2 <- getinfo(dsub, 'label')
|
||||
#' all.equal(labels1, labels2)
|
||||
#'
|
||||
#' @rdname slice.xgb.DMatrix
|
||||
#' @export
|
||||
slice <- function(object, ...) UseMethod("slice")
|
||||
|
||||
#' @rdname slice.xgb.DMatrix
|
||||
#' @export
|
||||
slice.xgb.DMatrix <- function(object, idxset, ...) {
|
||||
if (class(object) != "xgb.DMatrix") {
|
||||
stop("slice: first argument dtrain must be xgb.DMatrix")
|
||||
}
|
||||
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, PACKAGE = "xgboost")
|
||||
|
||||
attr_list <- attributes(object)
|
||||
nr <- nrow(object)
|
||||
len <- sapply(attr_list, length)
|
||||
ind <- which(len == nr)
|
||||
if (length(ind) > 0) {
|
||||
nms <- names(attr_list)[ind]
|
||||
for (i in 1:length(ind)) {
|
||||
attr(ret, nms[i]) <- attr(object, nms[i])[idxset]
|
||||
}
|
||||
}
|
||||
return(structure(ret, class = "xgb.DMatrix"))
|
||||
}
|
||||
|
||||
#' @rdname slice.xgb.DMatrix
|
||||
#' @export
|
||||
`[.xgb.DMatrix` <- function(object, idxset, colset=NULL) {
|
||||
slice(object, idxset)
|
||||
}
|
||||
|
||||
|
||||
#' Print xgb.DMatrix
|
||||
#'
|
||||
#' Print information about an xgb.DMatrix.
|
||||
#' Currently is would display
|
||||
#' @param x an xgb.DMatrix object
|
||||
#' @param verbose whether to print colnames (when present)
|
||||
#'
|
||||
#' @export
|
||||
print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
|
||||
cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ')
|
||||
infos <- c()
|
||||
if(length(getinfo(x, 'label')) > 0) infos <- 'label'
|
||||
if(length(getinfo(x, 'weight')) > 0) infos <- c(infos, 'weight')
|
||||
if(length(getinfo(x, 'base_margin')) > 0) infos <- c(infos, 'base_margin')
|
||||
if (length(infos) == 0) infos <- 'NA'
|
||||
cat(infos)
|
||||
cnames <- colnames(x)
|
||||
cat(' colnames:')
|
||||
if (verbose & !is.null(cnames)) {
|
||||
cat("\n'")
|
||||
cat(cnames, sep="','")
|
||||
cat("'")
|
||||
} else {
|
||||
if (is.null(cnames)) cat(' no')
|
||||
else cat(' yes')
|
||||
}
|
||||
cat("\n")
|
||||
invisible(x)
|
||||
}
|
||||
|
||||
@ -169,11 +169,11 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
||||
num_class <- params[['num_class']]
|
||||
if (is.null(num_class))
|
||||
stop('must set num_class to use softmax')
|
||||
predictValues <- matrix(0,xgb.numrow(dtrain),num_class)
|
||||
predictValues <- matrix(0, nrow(dtrain), num_class)
|
||||
mat_pred <- TRUE
|
||||
}
|
||||
else
|
||||
predictValues <- rep(0,xgb.numrow(dtrain))
|
||||
predictValues <- rep(0, nrow(dtrain))
|
||||
history <- c()
|
||||
print.every.n <- max(as.integer(print.every.n), 1L)
|
||||
for (i in 1:nrounds) {
|
||||
|
||||
29
R-package/man/dim.xgb.DMatrix.Rd
Normal file
29
R-package/man/dim.xgb.DMatrix.Rd
Normal file
@ -0,0 +1,29 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.DMatrix.R
|
||||
\name{dim.xgb.DMatrix}
|
||||
\alias{dim.xgb.DMatrix}
|
||||
\title{Dimensions of xgb.DMatrix}
|
||||
\usage{
|
||||
\method{dim}{xgb.DMatrix}(x)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{Object of class \code{xgb.DMatrix}}
|
||||
}
|
||||
\description{
|
||||
Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}.
|
||||
}
|
||||
\details{
|
||||
Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also
|
||||
be directly used with an \code{xgb.DMatrix} object.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
train <- agaricus.train
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
|
||||
stopifnot(nrow(dtrain) == nrow(train$data))
|
||||
stopifnot(ncol(dtrain) == ncol(train$data))
|
||||
stopifnot(all(dim(dtrain) == dim(train$data)))
|
||||
|
||||
}
|
||||
|
||||
36
R-package/man/dimnames.xgb.DMatrix.Rd
Normal file
36
R-package/man/dimnames.xgb.DMatrix.Rd
Normal file
@ -0,0 +1,36 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.DMatrix.R
|
||||
\name{dimnames.xgb.DMatrix}
|
||||
\alias{dimnames.xgb.DMatrix}
|
||||
\alias{dimnames<-.xgb.DMatrix}
|
||||
\title{Handling of column names of \code{xgb.DMatrix}}
|
||||
\usage{
|
||||
\method{dimnames}{xgb.DMatrix}(x)
|
||||
|
||||
\method{dimnames}{xgb.DMatrix}(x) <- value
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{object of class \code{xgb.DMatrix}}
|
||||
|
||||
\item{value}{a list of two elements: the first one is ignored
|
||||
and the second one is column names}
|
||||
}
|
||||
\description{
|
||||
Only column names are supported for \code{xgb.DMatrix}, thus setting of
|
||||
row names would have no effect and returnten row names would be NULL.
|
||||
}
|
||||
\details{
|
||||
Generic \code{dimnames} methods are used by \code{colnames}.
|
||||
Since row names are irrelevant, it is recommended to use \code{colnames} directly.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
train <- agaricus.train
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
dimnames(dtrain)
|
||||
colnames(dtrain)
|
||||
colnames(dtrain) <- make.names(1:ncol(train$data))
|
||||
print(dtrain, verbose=TRUE)
|
||||
|
||||
}
|
||||
|
||||
@ -1,27 +1,26 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/getinfo.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
% Please edit documentation in R/xgb.DMatrix.R
|
||||
\name{getinfo}
|
||||
\alias{getinfo}
|
||||
\alias{getinfo,xgb.DMatrix-method}
|
||||
\alias{getinfo.xgb.DMatrix}
|
||||
\title{Get information of an xgb.DMatrix object}
|
||||
\usage{
|
||||
getinfo(object, ...)
|
||||
|
||||
\S4method{getinfo}{xgb.DMatrix}(object, name)
|
||||
\method{getinfo}{xgb.DMatrix}(object, name)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class \code{xgb.DMatrix}}
|
||||
|
||||
\item{...}{other parameters}
|
||||
|
||||
\item{name}{the name of the field to get}
|
||||
\item{name}{the name of the information field to get (see details)}
|
||||
}
|
||||
\description{
|
||||
Get information of an xgb.DMatrix object
|
||||
}
|
||||
\details{
|
||||
The information can be one of the following:
|
||||
The \code{name} field can be one of the following:
|
||||
|
||||
\itemize{
|
||||
\item \code{label}: label Xgboost learn from ;
|
||||
@ -34,8 +33,10 @@ The information can be one of the following:
|
||||
data(agaricus.train, package='xgboost')
|
||||
train <- agaricus.train
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
|
||||
labels <- getinfo(dtrain, 'label')
|
||||
setinfo(dtrain, 'label', 1-labels)
|
||||
|
||||
labels2 <- getinfo(dtrain, 'label')
|
||||
stopifnot(all(labels2 == 1-labels))
|
||||
}
|
||||
|
||||
58
R-package/man/predict.xgb.Booster.Rd
Normal file
58
R-package/man/predict.xgb.Booster.Rd
Normal file
@ -0,0 +1,58 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.Booster.R
|
||||
\name{predict.xgb.Booster}
|
||||
\alias{predict.xgb.Booster}
|
||||
\alias{predict.xgb.Booster.handle}
|
||||
\title{Predict method for eXtreme Gradient Boosting model}
|
||||
\usage{
|
||||
\method{predict}{xgb.Booster}(object, newdata, missing = NA,
|
||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE)
|
||||
|
||||
\method{predict}{xgb.Booster.handle}(object, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}}
|
||||
|
||||
\item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or
|
||||
\code{xgb.DMatrix}.}
|
||||
|
||||
\item{missing}{Missing is only used when input is dense matrix, pick a float
|
||||
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
|
||||
|
||||
\item{outputmargin}{whether the prediction should be shown in the original
|
||||
value of sum of functions, when outputmargin=TRUE, the prediction is
|
||||
untransformed margin value. In logistic regression, outputmargin=T will
|
||||
output value before logistic transformation.}
|
||||
|
||||
\item{ntreelimit}{limit number of trees used in prediction, this parameter is
|
||||
only valid for gbtree, but not for gblinear. set it to be value bigger
|
||||
than 0. It will use all trees by default.}
|
||||
|
||||
\item{predleaf}{whether predict leaf index instead. If set to TRUE, the output will be a matrix object.}
|
||||
|
||||
\item{...}{Parameters pass to \code{predict.xgb.Booster}}
|
||||
}
|
||||
\description{
|
||||
Predicted values based on either xgboost model or model handle object.
|
||||
}
|
||||
\details{
|
||||
The option \code{ntreelimit} purpose is to let the user train a model with lots
|
||||
of trees but use only the first trees for prediction to avoid overfitting
|
||||
(without having to train a new model with less trees).
|
||||
|
||||
The option \code{predleaf} purpose is inspired from §3.1 of the paper
|
||||
\code{Practical Lessons from Predicting Clicks on Ads at Facebook}.
|
||||
The idea is to use the model as a generator of new features which capture non linear link
|
||||
from original features.
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
data(agaricus.test, package='xgboost')
|
||||
train <- agaricus.train
|
||||
test <- agaricus.test
|
||||
|
||||
bst <- xgboost(data = train$data, label = train$label, max.depth = 2,
|
||||
eta = 1, nthread = 2, nround = 2,objective = "binary:logistic")
|
||||
pred <- predict(bst, test$data)
|
||||
}
|
||||
|
||||
18
R-package/man/print.xgb.DMatrix.Rd
Normal file
18
R-package/man/print.xgb.DMatrix.Rd
Normal file
@ -0,0 +1,18 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.DMatrix.R
|
||||
\name{print.xgb.DMatrix}
|
||||
\alias{print.xgb.DMatrix}
|
||||
\title{Print xgb.DMatrix}
|
||||
\usage{
|
||||
print.xgb.DMatrix(x, verbose = FALSE, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{x}{an xgb.DMatrix object}
|
||||
|
||||
\item{verbose}{whether to print colnames (when present)}
|
||||
}
|
||||
\description{
|
||||
Print information about an xgb.DMatrix.
|
||||
Currently is would display
|
||||
}
|
||||
|
||||
@ -1,14 +1,13 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/setinfo.xgb.DMatrix.R
|
||||
\docType{methods}
|
||||
% Please edit documentation in R/xgb.DMatrix.R
|
||||
\name{setinfo}
|
||||
\alias{setinfo}
|
||||
\alias{setinfo,xgb.DMatrix-method}
|
||||
\alias{setinfo.xgb.DMatrix}
|
||||
\title{Set information of an xgb.DMatrix object}
|
||||
\usage{
|
||||
setinfo(object, ...)
|
||||
|
||||
\S4method{setinfo}{xgb.DMatrix}(object, name, info)
|
||||
\method{setinfo}{xgb.DMatrix}(object, name, info)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.DMatrix"}
|
||||
@ -23,7 +22,7 @@ setinfo(object, ...)
|
||||
Set information of an xgb.DMatrix object
|
||||
}
|
||||
\details{
|
||||
It can be one of the following:
|
||||
The \code{name} field can be one of the following:
|
||||
|
||||
\itemize{
|
||||
\item \code{label}: label Xgboost learn from ;
|
||||
@ -36,9 +35,10 @@ It can be one of the following:
|
||||
data(agaricus.train, package='xgboost')
|
||||
train <- agaricus.train
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
|
||||
labels <- getinfo(dtrain, 'label')
|
||||
setinfo(dtrain, 'label', 1-labels)
|
||||
labels2 <- getinfo(dtrain, 'label')
|
||||
stopifnot(all(labels2 == 1-labels))
|
||||
stopifnot(all.equal(labels2, 1-labels))
|
||||
}
|
||||
|
||||
|
||||
41
R-package/man/slice.xgb.DMatrix.Rd
Normal file
41
R-package/man/slice.xgb.DMatrix.Rd
Normal file
@ -0,0 +1,41 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.DMatrix.R
|
||||
\name{slice}
|
||||
\alias{[.xgb.DMatrix}
|
||||
\alias{slice}
|
||||
\alias{slice.xgb.DMatrix}
|
||||
\title{Get a new DMatrix containing the specified rows of
|
||||
orginal xgb.DMatrix object}
|
||||
\usage{
|
||||
slice(object, ...)
|
||||
|
||||
\method{slice}{xgb.DMatrix}(object, idxset, ...)
|
||||
|
||||
\method{[}{xgb.DMatrix}(object, idxset, colset = NULL)
|
||||
}
|
||||
\arguments{
|
||||
\item{object}{Object of class "xgb.DMatrix"}
|
||||
|
||||
\item{...}{other parameters (currently not used)}
|
||||
|
||||
\item{idxset}{a integer vector of indices of rows needed}
|
||||
|
||||
\item{colset}{currently not used (columns subsetting is not available)}
|
||||
}
|
||||
\description{
|
||||
Get a new DMatrix containing the specified rows of
|
||||
orginal xgb.DMatrix object
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
train <- agaricus.train
|
||||
dtrain <- xgb.DMatrix(train$data, label=train$label)
|
||||
|
||||
dsub <- slice(dtrain, 1:42)
|
||||
labels1 <- getinfo(dsub, 'label')
|
||||
dsub <- dtrain[1:42, ]
|
||||
labels2 <- getinfo(dsub, 'label')
|
||||
all.equal(labels1, labels2)
|
||||
|
||||
}
|
||||
|
||||
@ -7,8 +7,7 @@
|
||||
xgb.DMatrix(data, info = list(), missing = NA, ...)
|
||||
}
|
||||
\arguments{
|
||||
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character
|
||||
indicating the data file.}
|
||||
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename}
|
||||
|
||||
\item{info}{a list of information of the xgb.DMatrix object}
|
||||
|
||||
@ -18,7 +17,8 @@ value that represents missing value. Sometime a data use 0 or other extreme valu
|
||||
\item{...}{other information to pass to \code{info}.}
|
||||
}
|
||||
\description{
|
||||
Contruct xgb.DMatrix object from dense matrix, sparse matrix or local file.
|
||||
Contruct xgb.DMatrix object from dense matrix, sparse matrix
|
||||
or local file (that is a previously saved \code{xgb.DMatrix}).
|
||||
}
|
||||
\examples{
|
||||
data(agaricus.train, package='xgboost')
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user