diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 3cd80d5c2..dd788ca2f 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -1,6 +1,16 @@ # Generated by roxygen2: do not edit by hand +S3method("[",xgb.DMatrix) +S3method("dimnames<-",xgb.DMatrix) +S3method(dim,xgb.DMatrix) +S3method(dimnames,xgb.DMatrix) +S3method(getinfo,xgb.DMatrix) +S3method(predict,xgb.Booster) +S3method(predict,xgb.Booster.handle) +S3method(setinfo,xgb.DMatrix) +S3method(slice,xgb.DMatrix) export(getinfo) +export(print.xgb.DMatrix) export(setinfo) export(slice) export(xgb.DMatrix) @@ -19,8 +29,6 @@ export(xgb.save) export(xgb.save.raw) export(xgb.train) export(xgboost) -exportMethods(nrow) -exportMethods(predict) import(methods) importClassesFrom(Matrix,dgCMatrix) importClassesFrom(Matrix,dgeMatrix) diff --git a/R-package/R/getinfo.xgb.DMatrix.R b/R-package/R/getinfo.xgb.DMatrix.R deleted file mode 100644 index 3000a1e7d..000000000 --- a/R-package/R/getinfo.xgb.DMatrix.R +++ /dev/null @@ -1,55 +0,0 @@ -setClass('xgb.DMatrix') - -#' Get information of an xgb.DMatrix object -#' -#' Get information of an xgb.DMatrix object -#' -#' The information can be one of the following: -#' -#' \itemize{ -#' \item \code{label}: label Xgboost learn from ; -#' \item \code{weight}: to do a weight rescale ; -#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ; -#' \item \code{nrow}: number of rows of the \code{xgb.DMatrix}. -#' } -#' -#' @examples -#' data(agaricus.train, package='xgboost') -#' train <- agaricus.train -#' dtrain <- xgb.DMatrix(train$data, label=train$label) -#' labels <- getinfo(dtrain, 'label') -#' setinfo(dtrain, 'label', 1-labels) -#' labels2 <- getinfo(dtrain, 'label') -#' stopifnot(all(labels2 == 1-labels)) -#' @rdname getinfo -#' @export -getinfo <- function(object, ...){ - UseMethod("getinfo") -} - - - -#' @param object Object of class \code{xgb.DMatrix} -#' @param name the name of the field to get -#' @param ... other parameters -#' @rdname getinfo -#' @method getinfo xgb.DMatrix -setMethod("getinfo", signature = "xgb.DMatrix", - definition = function(object, name) { - if (typeof(name) != "character") { - stop("xgb.getinfo: name must be character") - } - if (class(object) != "xgb.DMatrix") { - stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix") - } - if (name != "label" && name != "weight" && - name != "base_margin" && name != "nrow") { - stop(paste("xgb.getinfo: unknown info name", name)) - } - if (name != "nrow"){ - ret <- .Call("XGDMatrixGetInfo_R", object, name, PACKAGE = "xgboost") - } else { - ret <- xgb.numrow(object) - } - return(ret) - }) diff --git a/R-package/R/nrow.xgb.DMatrix.R b/R-package/R/nrow.xgb.DMatrix.R deleted file mode 100644 index 9ea039764..000000000 --- a/R-package/R/nrow.xgb.DMatrix.R +++ /dev/null @@ -1,19 +0,0 @@ -setGeneric("nrow") - -#' @title Number of xgb.DMatrix rows -#' @description \code{nrow} return the number of rows present in the \code{xgb.DMatrix}. -#' @param x Object of class \code{xgb.DMatrix} -#' -#' @examples -#' data(agaricus.train, package='xgboost') -#' train <- agaricus.train -#' dtrain <- xgb.DMatrix(train$data, label=train$label) -#' stopifnot(nrow(dtrain) == nrow(train$data)) -#' -#' @export -setMethod("nrow", - signature = "xgb.DMatrix", - definition = function(x) { - xgb.numrow(x) - } -) diff --git a/R-package/R/predict.xgb.Booster.handle.R b/R-package/R/predict.xgb.Booster.handle.R deleted file mode 100644 index 3e4013b75..000000000 --- a/R-package/R/predict.xgb.Booster.handle.R +++ /dev/null @@ -1,18 +0,0 @@ -#' Predict method for eXtreme Gradient Boosting model handle -#' -#' Predicted values based on xgb.Booster.handle object. -#' -#' @param object Object of class "xgb.Boost.handle" -#' @param ... Parameters pass to \code{predict.xgb.Booster} -#' -setMethod("predict", signature = "xgb.Booster.handle", - definition = function(object, ...) { - if (class(object) != "xgb.Booster.handle"){ - stop("predict: model in prediction must be of class xgb.Booster.handle") - } - - bst <- xgb.handleToBooster(object) - - ret <- predict(bst, ...) - return(ret) -}) diff --git a/R-package/R/setinfo.xgb.DMatrix.R b/R-package/R/setinfo.xgb.DMatrix.R deleted file mode 100644 index 427de08d4..000000000 --- a/R-package/R/setinfo.xgb.DMatrix.R +++ /dev/null @@ -1,37 +0,0 @@ -#' Set information of an xgb.DMatrix object -#' -#' Set information of an xgb.DMatrix object -#' -#' It can be one of the following: -#' -#' \itemize{ -#' \item \code{label}: label Xgboost learn from ; -#' \item \code{weight}: to do a weight rescale ; -#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ; -#' \item \code{group}. -#' } -#' -#' @examples -#' data(agaricus.train, package='xgboost') -#' train <- agaricus.train -#' dtrain <- xgb.DMatrix(train$data, label=train$label) -#' labels <- getinfo(dtrain, 'label') -#' setinfo(dtrain, 'label', 1-labels) -#' labels2 <- getinfo(dtrain, 'label') -#' stopifnot(all(labels2 == 1-labels)) -#' @rdname setinfo -#' @export -setinfo <- function(object, ...){ - UseMethod("setinfo") -} - -#' @param object Object of class "xgb.DMatrix" -#' @param name the name of the field to get -#' @param info the specific field of information to set -#' @param ... other parameters -#' @rdname setinfo -#' @method setinfo xgb.DMatrix -setMethod("setinfo", signature = "xgb.DMatrix", - definition = function(object, name, info) { - xgb.setinfo(object, name, info) - }) diff --git a/R-package/R/slice.xgb.DMatrix.R b/R-package/R/slice.xgb.DMatrix.R deleted file mode 100644 index 4626c2b4d..000000000 --- a/R-package/R/slice.xgb.DMatrix.R +++ /dev/null @@ -1,44 +0,0 @@ -setClass('xgb.DMatrix') - -#' Get a new DMatrix containing the specified rows of -#' orginal xgb.DMatrix object -#' -#' Get a new DMatrix containing the specified rows of -#' orginal xgb.DMatrix object -#' -#' @examples -#' data(agaricus.train, package='xgboost') -#' train <- agaricus.train -#' dtrain <- xgb.DMatrix(train$data, label=train$label) -#' dsub <- slice(dtrain, 1:3) -#' @rdname slice -#' @export -slice <- function(object, ...){ - UseMethod("slice") -} - -#' @param object Object of class "xgb.DMatrix" -#' @param idxset a integer vector of indices of rows needed -#' @param ... other parameters -#' @rdname slice -#' @method slice xgb.DMatrix -setMethod("slice", signature = "xgb.DMatrix", - definition = function(object, idxset, ...) { - if (class(object) != "xgb.DMatrix") { - stop("slice: first argument dtrain must be xgb.DMatrix") - } - ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, - PACKAGE = "xgboost") - - attr_list <- attributes(object) - nr <- xgb.numrow(object) - len <- sapply(attr_list,length) - ind <- which(len == nr) - if (length(ind) > 0) { - nms <- names(attr_list)[ind] - for (i in 1:length(ind)) { - attr(ret,nms[i]) <- attr(object,nms[i])[idxset] - } - } - return(structure(ret, class = "xgb.DMatrix")) - }) diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 926e82994..514826b3c 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -1,4 +1,4 @@ - #' @importClassesFrom Matrix dgCMatrix dgeMatrix +#' @importClassesFrom Matrix dgCMatrix dgeMatrix #' @import methods # depends on matrix @@ -9,131 +9,10 @@ library.dynam.unload("xgboost", libpath) } -# set information into dmatrix, this mutate dmatrix -xgb.setinfo <- function(dmat, name, info) { - if (class(dmat) != "xgb.DMatrix") { - stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix") - } - if (name == "label") { - if (length(info) != xgb.numrow(dmat)) - stop("The length of labels must equal to the number of rows in the input data") - .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), - PACKAGE = "xgboost") - return(TRUE) - } - if (name == "weight") { - if (length(info) != xgb.numrow(dmat)) - stop("The length of weights must equal to the number of rows in the input data") - .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), - PACKAGE = "xgboost") - return(TRUE) - } - if (name == "base_margin") { - # if (length(info)!=xgb.numrow(dmat)) - # stop("The length of base margin must equal to the number of rows in the input data") - .Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info), - PACKAGE = "xgboost") - return(TRUE) - } - if (name == "group") { - if (sum(info) != xgb.numrow(dmat)) - stop("The sum of groups must equal to the number of rows in the input data") - .Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info), - PACKAGE = "xgboost") - return(TRUE) - } - stop(paste("xgb.setinfo: unknown info name", name)) - return(FALSE) -} -# construct a Booster from cachelist -xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) { - if (typeof(cachelist) != "list") { - stop("xgb.Booster: only accepts list of DMatrix as cachelist") - } - for (dm in cachelist) { - if (class(dm) != "xgb.DMatrix") { - stop("xgb.Booster: only accepts list of DMatrix as cachelist") - } - } - handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost") - if (length(params) != 0) { - for (i in 1:length(params)) { - p <- params[i] - .Call("XGBoosterSetParam_R", handle, gsub("\\.", "_", names(p)), as.character(p), - PACKAGE = "xgboost") - } - } - if (!is.null(modelfile)) { - if (typeof(modelfile) == "character") { - .Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost") - } else if (typeof(modelfile) == "raw") { - .Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost") - } else { - stop("xgb.Booster: modelfile must be character or raw vector") - } - } - return(structure(handle, class = "xgb.Booster.handle")) -} - -# convert xgb.Booster.handle to xgb.Booster -xgb.handleToBooster <- function(handle, raw = NULL) -{ - bst <- list(handle = handle, raw = raw) - class(bst) <- "xgb.Booster" - return(bst) -} - -# Check whether an xgb.Booster object is complete -xgb.Booster.check <- function(bst, saveraw = TRUE) -{ - isnull <- is.null(bst$handle) - if (!isnull) { - isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost") - } - if (isnull) { - bst$handle <- xgb.Booster(modelfile = bst$raw) - } else { - if (is.null(bst$raw) && saveraw) - bst$raw <- xgb.save.raw(bst$handle) - } - return(bst) -} - -## ----the following are low level iteratively function, not needed if +## ----the following are low level iterative functions, not needed if ## you do not want to use them --------------------------------------- -# get dmatrix from data, label -xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) { - inClass <- class(data) - if (inClass == "dgCMatrix" || inClass == "matrix") { - if (is.null(label)) { - stop("xgboost: need label when data is a matrix") - } - dtrain <- xgb.DMatrix(data, label = label, missing = missing) - if (!is.null(weight)){ - xgb.setinfo(dtrain, "weight", weight) - } - } else { - if (!is.null(label)) { - warning("xgboost: label will be ignored.") - } - if (inClass == "character") { - dtrain <- xgb.DMatrix(data) - } else if (inClass == "xgb.DMatrix") { - dtrain <- data - } else if (inClass == "data.frame") { - stop("xgboost only support numerical matrix input, - use 'data.matrix' to transform the data.") - } else { - stop("xgboost: Invalid input of data") - } - } - return (dtrain) -} -xgb.numrow <- function(dmat) { - nrow <- .Call("XGDMatrixNumRow_R", dmat, PACKAGE="xgboost") - return(nrow) -} + # iteratively update booster with customized statistics xgb.iter.boost <- function(booster, dtrain, gpair) { if (class(booster) != "xgb.Booster.handle") { @@ -227,7 +106,7 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) { "\tConsider providing pre-computed CV-folds through the folds parameter.") } y <- getinfo(dall, 'label') - randidx <- sample(1 : xgb.numrow(dall)) + randidx <- sample(1 : nrow(dall)) if (stratified & length(y) == length(randidx)) { y <- y[randidx] # diff --git a/R-package/R/predict.xgb.Booster.R b/R-package/R/xgb.Booster.R similarity index 54% rename from R-package/R/predict.xgb.Booster.R rename to R-package/R/xgb.Booster.R index d608f3465..97f5ea37f 100644 --- a/R-package/R/predict.xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -1,13 +1,65 @@ -setClass("xgb.Booster.handle") -setClass("xgb.Booster", - slots = c(handle = "xgb.Booster.handle", - raw = "raw")) +# Construct a Booster from cachelist +# internal utility function +xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) { + if (typeof(cachelist) != "list") { + stop("xgb.Booster: only accepts list of DMatrix as cachelist") + } + for (dm in cachelist) { + if (class(dm) != "xgb.DMatrix") { + stop("xgb.Booster: only accepts list of DMatrix as cachelist") + } + } + handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost") + if (length(params) != 0) { + for (i in 1:length(params)) { + p <- params[i] + .Call("XGBoosterSetParam_R", handle, gsub("\\.", "_", names(p)), as.character(p), + PACKAGE = "xgboost") + } + } + if (!is.null(modelfile)) { + if (typeof(modelfile) == "character") { + .Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost") + } else if (typeof(modelfile) == "raw") { + .Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost") + } else { + stop("xgb.Booster: modelfile must be character or raw vector") + } + } + return(structure(handle, class = "xgb.Booster.handle")) +} + +# Convert xgb.Booster.handle to xgb.Booster +# internal utility function +xgb.handleToBooster <- function(handle, raw = NULL) +{ + bst <- list(handle = handle, raw = raw) + class(bst) <- "xgb.Booster" + return(bst) +} + +# Check whether an xgb.Booster object is complete +# internal utility function +xgb.Booster.check <- function(bst, saveraw = TRUE) +{ + isnull <- is.null(bst$handle) + if (!isnull) { + isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost") + } + if (isnull) { + bst$handle <- xgb.Booster(modelfile = bst$raw) + } else { + if (is.null(bst$raw) && saveraw) + bst$raw <- xgb.save.raw(bst$handle) + } + return(bst) +} #' Predict method for eXtreme Gradient Boosting model #' -#' Predicted values based on xgboost model object. +#' Predicted values based on either xgboost model or model handle object. #' -#' @param object Object of class "xgb.Boost" +#' @param object Object of class \code{xgb.Booster} or \code{xgb.Booster.handle} #' @param newdata takes \code{matrix}, \code{dgCMatrix}, local data file or #' \code{xgb.DMatrix}. #' @param missing Missing is only used when input is dense matrix, pick a float @@ -20,6 +72,7 @@ setClass("xgb.Booster", #' only valid for gbtree, but not for gblinear. set it to be value bigger #' than 0. It will use all trees by default. #' @param predleaf whether predict leaf index instead. If set to TRUE, the output will be a matrix object. +#' @param ... Parameters pass to \code{predict.xgb.Booster} #' #' @details #' The option \code{ntreelimit} purpose is to let the user train a model with lots @@ -36,13 +89,14 @@ setClass("xgb.Booster", #' data(agaricus.test, package='xgboost') #' train <- agaricus.train #' test <- agaricus.test +#' #' bst <- xgboost(data = train$data, label = train$label, max.depth = 2, #' eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") #' pred <- predict(bst, test$data) +#' @rdname predict.xgb.Booster #' @export -setMethod("predict", signature = "xgb.Booster", - definition = function(object, newdata, missing = NA, - outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) { +predict.xgb.Booster <- function(object, newdata, missing = NA, + outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) { if (class(object) != "xgb.Booster"){ stop("predict: model in prediction must be of class xgb.Booster") } else { @@ -77,4 +131,14 @@ setMethod("predict", signature = "xgb.Booster", } } return(ret) -}) +} + +#' @rdname predict.xgb.Booster +#' @export +predict.xgb.Booster.handle <- function(object, ...) { + + bst <- xgb.handleToBooster(object) + + ret <- predict(bst, ...) + return(ret) +} diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index c34c65d95..c5f4fed72 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -1,9 +1,9 @@ #' Contruct xgb.DMatrix object #' -#' Contruct xgb.DMatrix object from dense matrix, sparse matrix or local file. +#' Contruct xgb.DMatrix object from dense matrix, sparse matrix +#' or local file (that was created previously by saving an \code{xgb.DMatrix}). #' -#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character -#' indicating the data file. +#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename #' @param info a list of information of the xgb.DMatrix object #' @param missing Missing is only used when input is dense matrix, pick a float #' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values. @@ -18,27 +18,348 @@ #' dtrain <- xgb.DMatrix('xgb.DMatrix.data') #' @export xgb.DMatrix <- function(data, info = list(), missing = NA, ...) { + cnames <- NULL if (typeof(data) == "character") { handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), PACKAGE = "xgboost") } else if (is.matrix(data)) { handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE = "xgboost") + cnames <- colnames(data) } else if (class(data) == "dgCMatrix") { handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, PACKAGE = "xgboost") + cnames <- colnames(data) } else { stop(paste("xgb.DMatrix: does not support to construct from ", typeof(data))) } - dmat <- structure(handle, class = "xgb.DMatrix") + dmat <- handle + attributes(dmat) <- list(.Dimnames = list(NULL, cnames), class = "xgb.DMatrix") + #dmat <- list(handle = handle, colnames = cnames) + #attr(dmat, 'class') <- "xgb.DMatrix" info <- append(info, list(...)) if (length(info) == 0) return(dmat) for (i in 1:length(info)) { p <- info[i] - xgb.setinfo(dmat, names(p), p[[1]]) + setinfo(dmat, names(p), p[[1]]) } return(dmat) } + + +# get dmatrix from data, label +# internal helper method +xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) { + inClass <- class(data) + if (inClass == "dgCMatrix" || inClass == "matrix") { + if (is.null(label)) { + stop("xgboost: need label when data is a matrix") + } + dtrain <- xgb.DMatrix(data, label = label, missing = missing) + if (!is.null(weight)){ + setinfo(dtrain, "weight", weight) + } + } else { + if (!is.null(label)) { + warning("xgboost: label will be ignored.") + } + if (inClass == "character") { + dtrain <- xgb.DMatrix(data) + } else if (inClass == "xgb.DMatrix") { + dtrain <- data + } else if (inClass == "data.frame") { + stop("xgboost only support numerical matrix input, + use 'data.matrix' to transform the data.") + } else { + stop("xgboost: Invalid input of data") + } + } + return (dtrain) +} + + +#' Dimensions of xgb.DMatrix +#' +#' Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}. +#' @param x Object of class \code{xgb.DMatrix} +#' +#' @details +#' Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also +#' be directly used with an \code{xgb.DMatrix} object. +#' +#' @examples +#' data(agaricus.train, package='xgboost') +#' train <- agaricus.train +#' dtrain <- xgb.DMatrix(train$data, label=train$label) +#' +#' stopifnot(nrow(dtrain) == nrow(train$data)) +#' stopifnot(ncol(dtrain) == ncol(train$data)) +#' stopifnot(all(dim(dtrain) == dim(train$data))) +#' +#' @export +dim.xgb.DMatrix <- function(x) { + c(.Call("XGDMatrixNumRow_R", x, PACKAGE="xgboost"), + .Call("XGDMatrixNumCol_R", x, PACKAGE="xgboost")) +} + + +#' Handling of column names of \code{xgb.DMatrix} +#' +#' Only column names are supported for \code{xgb.DMatrix}, thus setting of +#' row names would have no effect and returnten row names would be NULL. +#' +#' @param x object of class \code{xgb.DMatrix} +#' @param value a list of two elements: the first one is ignored +#' and the second one is column names +#' +#' @details +#' Generic \code{dimnames} methods are used by \code{colnames}. +#' Since row names are irrelevant, it is recommended to use \code{colnames} directly. +#' +#' @examples +#' data(agaricus.train, package='xgboost') +#' train <- agaricus.train +#' dtrain <- xgb.DMatrix(train$data, label=train$label) +#' dimnames(dtrain) +#' colnames(dtrain) +#' colnames(dtrain) <- make.names(1:ncol(train$data)) +#' print(dtrain, verbose=TRUE) +#' +#' @rdname dimnames.xgb.DMatrix +#' @export +dimnames.xgb.DMatrix <- function(x) { + attr(x, '.Dimnames') +} + +#' @rdname dimnames.xgb.DMatrix +#' @export +`dimnames<-.xgb.DMatrix` <- function(x, value) { + if (!is.list(value) || length(value) != 2L) + stop("invalid 'dimnames' given: must be a list of two elements") + if (!is.null(value[[1L]])) + stop("xgb.DMatrix does not have rownames") + if (is.null(value[[2]])) { + attr(x, '.Dimnames') <- NULL + return(x) + } + if (ncol(x) != length(value[[2]])) + stop("can't assign ", length(value[[2]]), " colnames to a ", + ncol(x), " column xgb.DMatrix") + attr(x, '.Dimnames') <- value + x +} + + +#' Get information of an xgb.DMatrix object +#' +#' Get information of an xgb.DMatrix object +#' @param object Object of class \code{xgb.DMatrix} +#' @param name the name of the information field to get (see details) +#' @param ... other parameters +#' +#' @details +#' The \code{name} field can be one of the following: +#' +#' \itemize{ +#' \item \code{label}: label Xgboost learn from ; +#' \item \code{weight}: to do a weight rescale ; +#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ; +#' \item \code{nrow}: number of rows of the \code{xgb.DMatrix}. +#' } +#' +#' @examples +#' data(agaricus.train, package='xgboost') +#' train <- agaricus.train +#' dtrain <- xgb.DMatrix(train$data, label=train$label) +#' +#' labels <- getinfo(dtrain, 'label') +#' setinfo(dtrain, 'label', 1-labels) +#' +#' labels2 <- getinfo(dtrain, 'label') +#' stopifnot(all(labels2 == 1-labels)) +#' @rdname getinfo +#' @export +getinfo <- function(object, ...) UseMethod("getinfo") + +#' @rdname getinfo +#' @export +getinfo.xgb.DMatrix <- function(object, name) { + if (typeof(name) != "character") { + stop("getinfo: name must be character") + } + if (name != "label" && name != "weight" && + name != "base_margin" && name != "nrow") { + stop(paste("getinfo: unknown info name", name)) + } + if (name != "nrow"){ + ret <- .Call("XGDMatrixGetInfo_R", object, name, PACKAGE = "xgboost") + } else { + ret <- nrow(object) + } + return(ret) +} + + +#' Set information of an xgb.DMatrix object +#' +#' Set information of an xgb.DMatrix object +#' +#' @param object Object of class "xgb.DMatrix" +#' @param name the name of the field to get +#' @param info the specific field of information to set +#' @param ... other parameters +#' +#' @details +#' The \code{name} field can be one of the following: +#' +#' \itemize{ +#' \item \code{label}: label Xgboost learn from ; +#' \item \code{weight}: to do a weight rescale ; +#' \item \code{base_margin}: base margin is the base prediction Xgboost will boost from ; +#' \item \code{group}. +#' } +#' +#' @examples +#' data(agaricus.train, package='xgboost') +#' train <- agaricus.train +#' dtrain <- xgb.DMatrix(train$data, label=train$label) +#' +#' labels <- getinfo(dtrain, 'label') +#' setinfo(dtrain, 'label', 1-labels) +#' labels2 <- getinfo(dtrain, 'label') +#' stopifnot(all.equal(labels2, 1-labels)) +#' @rdname setinfo +#' @export +setinfo <- function(object, ...) UseMethod("setinfo") + +#' @rdname setinfo +#' @export +setinfo.xgb.DMatrix <- function(object, name, info) { + if (name == "label") { + if (length(info) != nrow(object)) + stop("The length of labels must equal to the number of rows in the input data") + .Call("XGDMatrixSetInfo_R", object, name, as.numeric(info), + PACKAGE = "xgboost") + return(TRUE) + } + if (name == "weight") { + if (length(info) != nrow(object)) + stop("The length of weights must equal to the number of rows in the input data") + .Call("XGDMatrixSetInfo_R", object, name, as.numeric(info), + PACKAGE = "xgboost") + return(TRUE) + } + if (name == "base_margin") { + # if (length(info)!=nrow(object)) + # stop("The length of base margin must equal to the number of rows in the input data") + .Call("XGDMatrixSetInfo_R", object, name, as.numeric(info), + PACKAGE = "xgboost") + return(TRUE) + } + if (name == "group") { + if (sum(info) != nrow(object)) + stop("The sum of groups must equal to the number of rows in the input data") + .Call("XGDMatrixSetInfo_R", object, name, as.integer(info), + PACKAGE = "xgboost") + return(TRUE) + } + stop(paste("setinfo: unknown info name", name)) + return(FALSE) +} + + +#' Get a new DMatrix containing the specified rows of +#' orginal xgb.DMatrix object +#' +#' Get a new DMatrix containing the specified rows of +#' orginal xgb.DMatrix object +#' +#' @param object Object of class "xgb.DMatrix" +#' @param idxset a integer vector of indices of rows needed +#' @param colset currently not used (columns subsetting is not available) +#' @param ... other parameters (currently not used) +#' +#' @examples +#' data(agaricus.train, package='xgboost') +#' train <- agaricus.train +#' dtrain <- xgb.DMatrix(train$data, label=train$label) +#' +#' dsub <- slice(dtrain, 1:42) +#' labels1 <- getinfo(dsub, 'label') +#' dsub <- dtrain[1:42, ] +#' labels2 <- getinfo(dsub, 'label') +#' all.equal(labels1, labels2) +#' +#' @rdname slice.xgb.DMatrix +#' @export +slice <- function(object, ...) UseMethod("slice") + +#' @rdname slice.xgb.DMatrix +#' @export +slice.xgb.DMatrix <- function(object, idxset, ...) { + if (class(object) != "xgb.DMatrix") { + stop("slice: first argument dtrain must be xgb.DMatrix") + } + ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, PACKAGE = "xgboost") + + attr_list <- attributes(object) + nr <- nrow(object) + len <- sapply(attr_list, length) + ind <- which(len == nr) + if (length(ind) > 0) { + nms <- names(attr_list)[ind] + for (i in 1:length(ind)) { + attr(ret, nms[i]) <- attr(object, nms[i])[idxset] + } + } + return(structure(ret, class = "xgb.DMatrix")) +} + +#' @rdname slice.xgb.DMatrix +#' @export +`[.xgb.DMatrix` <- function(object, idxset, colset=NULL) { + slice(object, idxset) +} + + +#' Print xgb.DMatrix +#' +#' Print information about xgb.DMatrix. +#' Currently it displays dimensions and presence of info-fields and colnames. +#' +#' @param x an xgb.DMatrix object +#' @param verbose whether to print colnames (when present) +#' @param ... not currently used +#' +#' @examples +#' data(agaricus.train, package='xgboost') +#' train <- agaricus.train +#' dtrain <- xgb.DMatrix(train$data, label=train$label) +#' +#' dtrain +#' print(dtrain, verbose=TRUE) +#' @export +print.xgb.DMatrix <- function(x, verbose=FALSE, ...) { + cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ') + infos <- c() + if(length(getinfo(x, 'label')) > 0) infos <- 'label' + if(length(getinfo(x, 'weight')) > 0) infos <- c(infos, 'weight') + if(length(getinfo(x, 'base_margin')) > 0) infos <- c(infos, 'base_margin') + if (length(infos) == 0) infos <- 'NA' + cat(infos) + cnames <- colnames(x) + cat(' colnames:') + if (verbose & !is.null(cnames)) { + cat("\n'") + cat(cnames, sep="','") + cat("'") + } else { + if (is.null(cnames)) cat(' no') + else cat(' yes') + } + cat("\n") + invisible(x) +} diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 7ad2e5c41..e3faf33a0 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -169,11 +169,11 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = num_class <- params[['num_class']] if (is.null(num_class)) stop('must set num_class to use softmax') - predictValues <- matrix(0,xgb.numrow(dtrain),num_class) + predictValues <- matrix(0, nrow(dtrain), num_class) mat_pred <- TRUE } else - predictValues <- rep(0,xgb.numrow(dtrain)) + predictValues <- rep(0, nrow(dtrain)) history <- c() print.every.n <- max(as.integer(print.every.n), 1L) for (i in 1:nrounds) { diff --git a/R-package/man/dim.xgb.DMatrix.Rd b/R-package/man/dim.xgb.DMatrix.Rd new file mode 100644 index 000000000..168782dec --- /dev/null +++ b/R-package/man/dim.xgb.DMatrix.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.DMatrix.R +\name{dim.xgb.DMatrix} +\alias{dim.xgb.DMatrix} +\title{Dimensions of xgb.DMatrix} +\usage{ +\method{dim}{xgb.DMatrix}(x) +} +\arguments{ +\item{x}{Object of class \code{xgb.DMatrix}} +} +\description{ +Returns a vector of numbers of rows and of columns in an \code{xgb.DMatrix}. +} +\details{ +Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also +be directly used with an \code{xgb.DMatrix} object. +} +\examples{ +data(agaricus.train, package='xgboost') +train <- agaricus.train +dtrain <- xgb.DMatrix(train$data, label=train$label) + +stopifnot(nrow(dtrain) == nrow(train$data)) +stopifnot(ncol(dtrain) == ncol(train$data)) +stopifnot(all(dim(dtrain) == dim(train$data))) + +} + diff --git a/R-package/man/dimnames.xgb.DMatrix.Rd b/R-package/man/dimnames.xgb.DMatrix.Rd new file mode 100644 index 000000000..0877f294b --- /dev/null +++ b/R-package/man/dimnames.xgb.DMatrix.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.DMatrix.R +\name{dimnames.xgb.DMatrix} +\alias{dimnames.xgb.DMatrix} +\alias{dimnames<-.xgb.DMatrix} +\title{Handling of column names of \code{xgb.DMatrix}} +\usage{ +\method{dimnames}{xgb.DMatrix}(x) + +\method{dimnames}{xgb.DMatrix}(x) <- value +} +\arguments{ +\item{x}{object of class \code{xgb.DMatrix}} + +\item{value}{a list of two elements: the first one is ignored +and the second one is column names} +} +\description{ +Only column names are supported for \code{xgb.DMatrix}, thus setting of +row names would have no effect and returnten row names would be NULL. +} +\details{ +Generic \code{dimnames} methods are used by \code{colnames}. +Since row names are irrelevant, it is recommended to use \code{colnames} directly. +} +\examples{ +data(agaricus.train, package='xgboost') +train <- agaricus.train +dtrain <- xgb.DMatrix(train$data, label=train$label) +dimnames(dtrain) +colnames(dtrain) +colnames(dtrain) <- make.names(1:ncol(train$data)) +print(dtrain, verbose=TRUE) + +} + diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index f8b4f6b99..108d7edf2 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -1,27 +1,26 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/getinfo.xgb.DMatrix.R -\docType{methods} +% Please edit documentation in R/xgb.DMatrix.R \name{getinfo} \alias{getinfo} -\alias{getinfo,xgb.DMatrix-method} +\alias{getinfo.xgb.DMatrix} \title{Get information of an xgb.DMatrix object} \usage{ getinfo(object, ...) -\S4method{getinfo}{xgb.DMatrix}(object, name) +\method{getinfo}{xgb.DMatrix}(object, name) } \arguments{ \item{object}{Object of class \code{xgb.DMatrix}} \item{...}{other parameters} -\item{name}{the name of the field to get} +\item{name}{the name of the information field to get (see details)} } \description{ Get information of an xgb.DMatrix object } \details{ -The information can be one of the following: +The \code{name} field can be one of the following: \itemize{ \item \code{label}: label Xgboost learn from ; @@ -34,8 +33,10 @@ The information can be one of the following: data(agaricus.train, package='xgboost') train <- agaricus.train dtrain <- xgb.DMatrix(train$data, label=train$label) + labels <- getinfo(dtrain, 'label') setinfo(dtrain, 'label', 1-labels) + labels2 <- getinfo(dtrain, 'label') stopifnot(all(labels2 == 1-labels)) } diff --git a/R-package/man/nrow-xgb.DMatrix-method.Rd b/R-package/man/nrow-xgb.DMatrix-method.Rd deleted file mode 100644 index 1fd52b9c1..000000000 --- a/R-package/man/nrow-xgb.DMatrix-method.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/nrow.xgb.DMatrix.R -\docType{methods} -\name{nrow,xgb.DMatrix-method} -\alias{nrow,xgb.DMatrix-method} -\title{Number of xgb.DMatrix rows} -\usage{ -\S4method{nrow}{xgb.DMatrix}(x) -} -\arguments{ -\item{x}{Object of class \code{xgb.DMatrix}} -} -\description{ -\code{nrow} return the number of rows present in the \code{xgb.DMatrix}. -} -\examples{ -data(agaricus.train, package='xgboost') -train <- agaricus.train -dtrain <- xgb.DMatrix(train$data, label=train$label) -stopifnot(nrow(dtrain) == nrow(train$data)) - -} - diff --git a/R-package/man/predict-xgb.Booster.handle-method.Rd b/R-package/man/predict-xgb.Booster.handle-method.Rd deleted file mode 100644 index 34454e555..000000000 --- a/R-package/man/predict-xgb.Booster.handle-method.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/predict.xgb.Booster.handle.R -\docType{methods} -\name{predict,xgb.Booster.handle-method} -\alias{predict,xgb.Booster.handle-method} -\title{Predict method for eXtreme Gradient Boosting model handle} -\usage{ -\S4method{predict}{xgb.Booster.handle}(object, ...) -} -\arguments{ -\item{object}{Object of class "xgb.Boost.handle"} - -\item{...}{Parameters pass to \code{predict.xgb.Booster}} -} -\description{ -Predicted values based on xgb.Booster.handle object. -} - diff --git a/R-package/man/predict-xgb.Booster-method.Rd b/R-package/man/predict.xgb.Booster.Rd similarity index 80% rename from R-package/man/predict-xgb.Booster-method.Rd rename to R-package/man/predict.xgb.Booster.Rd index 341ced8c6..504037937 100644 --- a/R-package/man/predict-xgb.Booster-method.Rd +++ b/R-package/man/predict.xgb.Booster.Rd @@ -1,15 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/predict.xgb.Booster.R -\docType{methods} -\name{predict,xgb.Booster-method} -\alias{predict,xgb.Booster-method} +% Please edit documentation in R/xgb.Booster.R +\name{predict.xgb.Booster} +\alias{predict.xgb.Booster} +\alias{predict.xgb.Booster.handle} \title{Predict method for eXtreme Gradient Boosting model} \usage{ -\S4method{predict}{xgb.Booster}(object, newdata, missing = NA, +\method{predict}{xgb.Booster}(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) + +\method{predict}{xgb.Booster.handle}(object, ...) } \arguments{ -\item{object}{Object of class "xgb.Boost"} +\item{object}{Object of class \code{xgb.Booster} or \code{xgb.Booster.handle}} \item{newdata}{takes \code{matrix}, \code{dgCMatrix}, local data file or \code{xgb.DMatrix}.} @@ -27,9 +29,11 @@ only valid for gbtree, but not for gblinear. set it to be value bigger than 0. It will use all trees by default.} \item{predleaf}{whether predict leaf index instead. If set to TRUE, the output will be a matrix object.} + +\item{...}{Parameters pass to \code{predict.xgb.Booster}} } \description{ -Predicted values based on xgboost model object. +Predicted values based on either xgboost model or model handle object. } \details{ The option \code{ntreelimit} purpose is to let the user train a model with lots @@ -46,6 +50,7 @@ data(agaricus.train, package='xgboost') data(agaricus.test, package='xgboost') train <- agaricus.train test <- agaricus.test + bst <- xgboost(data = train$data, label = train$label, max.depth = 2, eta = 1, nthread = 2, nround = 2,objective = "binary:logistic") pred <- predict(bst, test$data) diff --git a/R-package/man/print.xgb.DMatrix.Rd b/R-package/man/print.xgb.DMatrix.Rd new file mode 100644 index 000000000..95837b9f4 --- /dev/null +++ b/R-package/man/print.xgb.DMatrix.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.DMatrix.R +\name{print.xgb.DMatrix} +\alias{print.xgb.DMatrix} +\title{Print xgb.DMatrix} +\usage{ +print.xgb.DMatrix(x, verbose = FALSE, ...) +} +\arguments{ +\item{x}{an xgb.DMatrix object} + +\item{verbose}{whether to print colnames (when present)} + +\item{...}{not currently used} +} +\description{ +Print information about xgb.DMatrix. +Currently it displays dimensions and presence of info-fields and colnames. +} +\examples{ +data(agaricus.train, package='xgboost') +train <- agaricus.train +dtrain <- xgb.DMatrix(train$data, label=train$label) + +dtrain +print(dtrain, verbose=TRUE) +} + diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd index cb939721e..b182a9042 100644 --- a/R-package/man/setinfo.Rd +++ b/R-package/man/setinfo.Rd @@ -1,14 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/setinfo.xgb.DMatrix.R -\docType{methods} +% Please edit documentation in R/xgb.DMatrix.R \name{setinfo} \alias{setinfo} -\alias{setinfo,xgb.DMatrix-method} +\alias{setinfo.xgb.DMatrix} \title{Set information of an xgb.DMatrix object} \usage{ setinfo(object, ...) -\S4method{setinfo}{xgb.DMatrix}(object, name, info) +\method{setinfo}{xgb.DMatrix}(object, name, info) } \arguments{ \item{object}{Object of class "xgb.DMatrix"} @@ -23,7 +22,7 @@ setinfo(object, ...) Set information of an xgb.DMatrix object } \details{ -It can be one of the following: +The \code{name} field can be one of the following: \itemize{ \item \code{label}: label Xgboost learn from ; @@ -36,9 +35,10 @@ It can be one of the following: data(agaricus.train, package='xgboost') train <- agaricus.train dtrain <- xgb.DMatrix(train$data, label=train$label) + labels <- getinfo(dtrain, 'label') setinfo(dtrain, 'label', 1-labels) labels2 <- getinfo(dtrain, 'label') -stopifnot(all(labels2 == 1-labels)) +stopifnot(all.equal(labels2, 1-labels)) } diff --git a/R-package/man/slice.Rd b/R-package/man/slice.xgb.DMatrix.Rd similarity index 52% rename from R-package/man/slice.Rd rename to R-package/man/slice.xgb.DMatrix.Rd index b17722115..c352203fb 100644 --- a/R-package/man/slice.Rd +++ b/R-package/man/slice.xgb.DMatrix.Rd @@ -1,22 +1,26 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/slice.xgb.DMatrix.R -\docType{methods} +% Please edit documentation in R/xgb.DMatrix.R \name{slice} +\alias{[.xgb.DMatrix} \alias{slice} -\alias{slice,xgb.DMatrix-method} +\alias{slice.xgb.DMatrix} \title{Get a new DMatrix containing the specified rows of orginal xgb.DMatrix object} \usage{ slice(object, ...) -\S4method{slice}{xgb.DMatrix}(object, idxset, ...) +\method{slice}{xgb.DMatrix}(object, idxset, ...) + +\method{[}{xgb.DMatrix}(object, idxset, colset = NULL) } \arguments{ \item{object}{Object of class "xgb.DMatrix"} -\item{...}{other parameters} +\item{...}{other parameters (currently not used)} \item{idxset}{a integer vector of indices of rows needed} + +\item{colset}{currently not used (columns subsetting is not available)} } \description{ Get a new DMatrix containing the specified rows of @@ -26,6 +30,12 @@ orginal xgb.DMatrix object data(agaricus.train, package='xgboost') train <- agaricus.train dtrain <- xgb.DMatrix(train$data, label=train$label) -dsub <- slice(dtrain, 1:3) + +dsub <- slice(dtrain, 1:42) +labels1 <- getinfo(dsub, 'label') +dsub <- dtrain[1:42, ] +labels2 <- getinfo(dsub, 'label') +all.equal(labels1, labels2) + } diff --git a/R-package/man/xgb.DMatrix.Rd b/R-package/man/xgb.DMatrix.Rd index 2e892cc6d..cdb8fdf62 100644 --- a/R-package/man/xgb.DMatrix.Rd +++ b/R-package/man/xgb.DMatrix.Rd @@ -7,8 +7,7 @@ xgb.DMatrix(data, info = list(), missing = NA, ...) } \arguments{ -\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character -indicating the data file.} +\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename} \item{info}{a list of information of the xgb.DMatrix object} @@ -18,7 +17,8 @@ value that represents missing value. Sometime a data use 0 or other extreme valu \item{...}{other information to pass to \code{info}.} } \description{ -Contruct xgb.DMatrix object from dense matrix, sparse matrix or local file. +Contruct xgb.DMatrix object from dense matrix, sparse matrix +or local file (that was created previously by saving an \code{xgb.DMatrix}). } \examples{ data(agaricus.train, package='xgboost') diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index 665fb5faa..846ad9f47 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -195,6 +195,14 @@ SEXP XGDMatrixNumRow_R(SEXP handle) { return ScalarInteger(static_cast(nrow)); } +SEXP XGDMatrixNumCol_R(SEXP handle) { + bst_ulong ncol; + R_API_BEGIN(); + CHECK_CALL(XGDMatrixNumCol(R_ExternalPtrAddr(handle), &ncol)); + R_API_END(); + return ScalarInteger(static_cast(ncol)); +} + // functions related to booster void _BoosterFinalizer(SEXP ext) { if (R_ExternalPtrAddr(ext) == NULL) return; diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index 123ebe028..6e1739464 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -83,10 +83,16 @@ XGB_DLL SEXP XGDMatrixGetInfo_R(SEXP handle, SEXP field); /*! * \brief return number of rows - * \param handle a instance of data matrix + * \param handle an instance of data matrix */ XGB_DLL SEXP XGDMatrixNumRow_R(SEXP handle); +/*! + * \brief return number of columns + * \param handle an instance of data matrix + */ +XGB_DLL SEXP XGDMatrixNumCol_R(SEXP handle); + /*! * \brief create xgboost learner * \param dmats a list of dmatrix handles that will be cached diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R new file mode 100644 index 000000000..baead5099 --- /dev/null +++ b/R-package/tests/testthat/test_dmatrix.R @@ -0,0 +1,67 @@ +require(xgboost) + +context("testing xgb.DMatrix functionality") + +data(agaricus.test, package='xgboost') +test_data <- agaricus.test$data[1:100,] +test_label <- agaricus.test$label[1:100] + +test_that("xgb.DMatrix: basic construction, saving, loading", { + # from sparse matrix + dtest1 <- xgb.DMatrix(test_data, label=test_label) + # from dense matrix + dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label) + expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label')) + + # save to a local file + tmp_file <- tempfile('xgb.DMatrix_') + expect_true(xgb.DMatrix.save(dtest1, tmp_file)) + # read from a local file + dtest3 <- xgb.DMatrix(tmp_file) + unlink(tmp_file) + expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label')) +}) + +test_that("xgb.DMatrix: getinfo & setinfo", { + dtest <- xgb.DMatrix(test_data) + expect_true(setinfo(dtest, 'label', test_label)) + labels <- getinfo(dtest, 'label') + expect_equal(test_label, getinfo(dtest, 'label')) + + expect_true(length(getinfo(dtest, 'weight')) == 0) + expect_true(length(getinfo(dtest, 'base_margin')) == 0) + + expect_true(setinfo(dtest, 'weight', test_label)) + expect_true(setinfo(dtest, 'base_margin', test_label)) + expect_true(setinfo(dtest, 'group', c(50,50))) + expect_error(setinfo(dtest, 'group', test_label)) + + # providing character values will give a warning + expect_warning( setinfo(dtest, 'weight', rep('a', nrow(test_data))) ) + + # any other label should error + expect_error(setinfo(dtest, 'asdf', test_label)) +}) + +test_that("xgb.DMatrix: slice, dim", { + dtest <- xgb.DMatrix(test_data, label=test_label) + expect_equal(dim(dtest), dim(test_data)) + dsub1 <- slice(dtest, 1:42) + expect_equal(nrow(dsub1), 42) + expect_equal(ncol(dsub1), ncol(test_data)) + + dsub2 <- dtest[1:42,] + expect_equal(dim(dtest), dim(test_data)) + expect_equal(getinfo(dsub1, 'label'), getinfo(dsub2, 'label')) +}) + +test_that("xgb.DMatrix: colnames", { + dtest <- xgb.DMatrix(test_data, label=test_label) + expect_equal(colnames(dtest), colnames(test_data)) + expect_error( colnames(dtest) <- 'asdf') + new_names <- make.names(1:ncol(test_data)) + expect_silent( colnames(dtest) <- new_names) + expect_equal(colnames(dtest), new_names) + expect_silent(colnames(dtest) <- NULL) + expect_null(colnames(dtest)) +})