[R] various R code maintenance (#1964)

* [R] xgb.save must work when handle in nil but raw exists

* [R] print.xgb.Booster should still print other info when handle is nil

* [R] rename internal function xgb.Booster to xgb.Booster.handle to make its intent clear

* [R] rename xgb.Booster.check to xgb.Booster.complete and make it visible; more docs

* [R] storing evaluation_log should depend only on watchlist, not on verbose

* [R] reduce the excessive chattiness of unit tests

* [R] only disable some tests in windows when it's not 64-bit

* [R] clean-up xgb.DMatrix

* [R] test xgb.DMatrix loading from libsvm text file

* [R] store feature_names in xgb.Booster, use them from utility functions

* [R] remove non-functional co-occurence computation from xgb.importance

* [R] verbose=0 is enough without a callback

* [R] added forgotten xgb.Booster.complete.Rd; cran check fixes

* [R] update installation instructions
This commit is contained in:
Vadim Khotilovich
2017-01-21 13:22:46 -06:00
committed by Tianqi Chen
parent a073a2c3d4
commit 2b5b96d760
27 changed files with 561 additions and 327 deletions

View File

@@ -1,6 +1,6 @@
# Construct a Booster from cachelist
# Construct an internal xgboost Booster and return a handle to it
# internal utility function
xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(cachelist) != "list" ||
any(sapply(cachelist, class) != 'xgb.DMatrix')) {
stop("xgb.Booster only accepts list of DMatrix as cachelist")
@@ -13,8 +13,8 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
} else if (typeof(modelfile) == "raw") {
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
} else if (class(modelfile) == "xgb.Booster") {
modelfile <- xgb.Booster.check(modelfile, saveraw=TRUE)
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile$raw, PACKAGE = "xgboost")
bst <- xgb.Booster.complete(modelfile, saveraw=TRUE)
.Call("XGBoosterLoadModelFromRaw_R", handle, bst$raw, PACKAGE = "xgboost")
} else {
stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
}
@@ -34,6 +34,17 @@ xgb.handleToBooster <- function(handle, raw = NULL) {
return(bst)
}
# Check whether xgb.Booster.handle is null
# internal utility function
is.null.handle <- function(handle) {
if (class(handle) != "xgb.Booster.handle")
stop("argument type must be xgb.Booster.handle")
if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost"))
return(TRUE)
return(FALSE)
}
# Return a verified to be valid handle out of either xgb.Booster.handle or xgb.Booster
# internal utility function
xgb.get.handle <- function(object) {
@@ -42,32 +53,65 @@ xgb.get.handle <- function(object) {
xgb.Booster.handle = object,
stop("argument must be of either xgb.Booster or xgb.Booster.handle class")
)
if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost")) {
if (is.null.handle(handle)) {
stop("invalid xgb.Booster.handle")
}
handle
}
# Check whether an xgb.Booster object is complete
# internal utility function
xgb.Booster.check <- function(bst, saveraw = TRUE) {
if (class(bst) != "xgb.Booster")
#' Restore missing parts of an incomplete xgb.Booster object.
#'
#' It attempts to complete an \code{xgb.Booster} object by restoring either its missing
#' raw model memory dump (when it has no \code{raw} data but its \code{xgb.Booster.handle} is valid)
#' or its missing internal handle (when its \code{xgb.Booster.handle} is not valid
#' but it has a raw Booster memory dump).
#'
#' @param object object of class \code{xgb.Booster}
#' @param saveraw a flag indicating whether to append \code{raw} Booster memory dump data
#' when it doesn't already exist.
#'
#' @details
#'
#' While this method is primarily for internal use, it might be useful in some practical situations.
#'
#' E.g., when an \code{xgb.Booster} model is saved as an R object and then is loaded as an R object,
#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
#' should still work for such a model object since those methods would be using
#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
#' \code{xgb.Booster.complete} function once after loading a model as an R-object. That which would
#' prevent further reconstruction (potentially, multiple times) of an internal booster model.
#'
#' @return
#' An object of \code{xgb.Booster} class.
#'
#' @examples
#'
#' data(agaricus.train, package='xgboost')
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 2,
#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' saveRDS(bst, "xgb.model.rds")
#'
#' bst1 <- readRDS("xgb.model.rds")
#' # the handle is invalid:
#' print(bst1$handle)
#' bst1 <- xgb.Booster.complete(bst1)
#' # now the handle points to a valid internal booster model:
#' print(bst1$handle)
#'
#' @export
xgb.Booster.complete <- function(object, saveraw = TRUE) {
if (class(object) != "xgb.Booster")
stop("argument type must be xgb.Booster")
isnull <- is.null(bst$handle)
if (!isnull) {
isnull <- .Call("XGCheckNullPtr_R", bst$handle, PACKAGE="xgboost")
}
if (isnull) {
bst$handle <- xgb.Booster(modelfile = bst$raw)
if (is.null.handle(object$handle)) {
object$handle <- xgb.Booster.handle(modelfile = object$raw)
} else {
if (is.null(bst$raw) && saveraw)
bst$raw <- xgb.save.raw(bst$handle)
if (is.null(object$raw) && saveraw)
object$raw <- xgb.save.raw(object$handle)
}
return(bst)
return(object)
}
#' Predict method for eXtreme Gradient Boosting model
#'
#' Predicted values based on either xgboost model or model handle object.
@@ -180,7 +224,7 @@ xgb.Booster.check <- function(bst, saveraw = TRUE) {
predict.xgb.Booster <- function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {
object <- xgb.Booster.check(object, saveraw = FALSE)
object <- xgb.Booster.complete(object, saveraw = FALSE)
if (class(newdata) != "xgb.DMatrix")
newdata <- xgb.DMatrix(newdata, missing = missing)
if (is.null(ntreelimit))
@@ -429,11 +473,10 @@ xgb.ntree <- function(bst) {
print.xgb.Booster <- function(x, verbose=FALSE, ...) {
cat('##### xgb.Booster\n')
if (is.null(x$handle) || .Call("XGCheckNullPtr_R", x$handle, PACKAGE="xgboost")) {
cat("handle is invalid\n")
return(x)
}
valid_handle <- is.null.handle(x$handle)
if (!valid_handle)
cat("Handle is invalid! Suggest using xgb.Booster.complete\n")
cat('raw: ')
if (!is.null(x$raw)) {
cat(format(object.size(x$raw), units="auto"), '\n')
@@ -454,7 +497,9 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
}
# TODO: need an interface to access all the xgboosts parameters
attrs <- xgb.attributes(x)
attrs <- character(0)
if (valid_handle)
attrs <- xgb.attributes(x)
if (length(attrs) > 0) {
cat('xgb.attributes:\n')
if (verbose) {
@@ -474,15 +519,19 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
})
}
if (!is.null(x$feature_names))
cat('# of features:', length(x$feature_names), '\n')
cat('niter: ', x$niter, '\n', sep='')
# TODO: uncomment when faster xgb.ntree is implemented
#cat('ntree: ', xgb.ntree(x), '\n', sep='')
for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks','evaluation_log','niter'))) {
for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
'evaluation_log','niter','feature_names'))) {
if (is.atomic(x[[n]])) {
cat(n, ': ', x[[n]], '\n', sep='')
cat(n, ':', x[[n]], '\n', sep=' ')
} else {
cat(n, ':\n\t', sep='')
cat(n, ':\n\t', sep=' ')
print(x[[n]])
}
}