[R] maintenance Apr 2017 (#2237)

* [R] make sure things work for a single split model; fixes #2191

* [R] add option use_int_id to xgb.model.dt.tree

* [R] add example of exporting tree plot to a file

* [R] set save_period = NULL as default in xgboost() to be the same as in xgb.train; fixes #2182

* [R] it's a good practice after CRAN releases to bump up package version in dev

* [R] allow xgb.DMatrix construction from integer dense matrices

* [R] xgb.DMatrix: silent parameter; improve documentation

* [R] xgb.model.dt.tree code style changes

* [R] update NEWS with parameter changes

* [R] code safety & style; handle non-strict matrix and inherited classes of input and model; fixes #2242

* [R] change to x.y.z.p R-package versioning scheme and set version to 0.6.4.3

* [R] add an R package versioning section to the contributors guide

* [R] R-package/README.md: clean up the redundant old installation instructions, link the contributors guide
This commit is contained in:
Vadim Khotilovich
2017-05-02 00:51:34 -05:00
committed by Tong He
parent d769b6bcb5
commit a375ad2822
29 changed files with 351 additions and 246 deletions

View File

@@ -1,14 +1,17 @@
#' Construct xgb.DMatrix object
#'
#' Contruct xgb.DMatrix object from dense matrix, sparse matrix
#' or local file (that was created previously by saving an \code{xgb.DMatrix}).
#' Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file.
#' Supported input file formats are either a libsvm text file or a binary file that was created previously by
#' \code{\link{xgb.DMatrix.save}}).
#'
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param info a list of information of the xgb.DMatrix object
#' @param missing Missing is only used when input is dense matrix, pick a float
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#
#' @param ... other information to pass to \code{info}.
#' @param data a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
#' string representing a filename.
#' @param info a named list of additional information to store in the \code{xgb.DMatrix} object.
#' See \code{\link{setinfo}} for the specific allowed kinds of
#' @param missing a float value to represents missing values in data (used only when input is a dense matrix).
#' It is useful when a 0 or some other extreme value represents missing values in data.
#' @param silent whether to suppress printing an informational message after loading from a file.
#' @param ... the \code{info} data could be passed directly as parameters, without creating an \code{info} list.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
@@ -17,19 +20,19 @@
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' @export
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
xgb.DMatrix <- function(data, info = list(), missing = NA, silent = FALSE, ...) {
cnames <- NULL
if (typeof(data) == "character") {
if (length(data) > 1)
stop("'data' has class 'character' and length ", length(data),
".\n 'data' accepts either a numeric matrix or a single filename.")
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(silent),
PACKAGE = "xgboost")
} else if (is.matrix(data)) {
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing,
PACKAGE = "xgboost")
cnames <- colnames(data)
} else if (class(data) == "dgCMatrix") {
} else if (inherits(data, "dgCMatrix")) {
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, nrow(data),
PACKAGE = "xgboost")
cnames <- colnames(data)
@@ -51,10 +54,9 @@ xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
# get dmatrix from data, label
# internal helper method
xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
inClass <- class(data)
if ("dgCMatrix" %in% inClass || "matrix" %in% inClass ) {
if (inherits(data, "dgCMatrix") || is.matrix(data)) {
if (is.null(label)) {
stop("xgboost: need label when data is a matrix")
stop("label must be provided when data is a matrix")
}
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
if (!is.null(weight)){
@@ -64,11 +66,11 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
if (!is.null(label)) {
warning("xgboost: label will be ignored.")
}
if (inClass == "character") {
dtrain <- xgb.DMatrix(data)
} else if (inClass == "xgb.DMatrix") {
if (is.character(data)) {
dtrain <- xgb.DMatrix(data[1])
} else if (inherits(data, "xgb.DMatrix")) {
dtrain <- data
} else if ("data.frame" %in% inClass) {
} else if (inherits(data, "data.frame")) {
stop("xgboost doesn't support data.frame as input. Convert it to matrix first.")
} else {
stop("xgboost: invalid input data")
@@ -98,8 +100,8 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
#'
#' @export
dim.xgb.DMatrix <- function(x) {
c(.Call("XGDMatrixNumRow_R", x, PACKAGE="xgboost"),
.Call("XGDMatrixNumCol_R", x, PACKAGE="xgboost"))
c(.Call("XGDMatrixNumRow_R", x, PACKAGE = "xgboost"),
.Call("XGDMatrixNumCol_R", x, PACKAGE = "xgboost"))
}
@@ -297,8 +299,8 @@ slice <- function(object, ...) UseMethod("slice")
#' @rdname slice.xgb.DMatrix
#' @export
slice.xgb.DMatrix <- function(object, idxset, ...) {
if (class(object) != "xgb.DMatrix") {
stop("slice: first argument dtrain must be xgb.DMatrix")
if (!inherits(object, "xgb.DMatrix")) {
stop("object must be xgb.DMatrix")
}
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, PACKAGE = "xgboost")
@@ -317,7 +319,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
#' @rdname slice.xgb.DMatrix
#' @export
`[.xgb.DMatrix` <- function(object, idxset, colset=NULL) {
`[.xgb.DMatrix` <- function(object, idxset, colset = NULL) {
slice(object, idxset)
}
@@ -341,7 +343,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
#'
#' @method print xgb.DMatrix
#' @export
print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
print.xgb.DMatrix <- function(x, verbose = FALSE, ...) {
cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ')
infos <- c()
if(length(getinfo(x, 'label')) > 0) infos <- 'label'
@@ -353,7 +355,7 @@ print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
cat(' colnames:')
if (verbose & !is.null(cnames)) {
cat("\n'")
cat(cnames, sep="','")
cat(cnames, sep = "','")
cat("'")
} else {
if (is.null(cnames)) cat(' no')