[R] maintenance Apr 2017 (#2237)

* [R] make sure things work for a single split model; fixes #2191

* [R] add option use_int_id to xgb.model.dt.tree

* [R] add example of exporting tree plot to a file

* [R] set save_period = NULL as default in xgboost() to be the same as in xgb.train; fixes #2182

* [R] it's a good practice after CRAN releases to bump up package version in dev

* [R] allow xgb.DMatrix construction from integer dense matrices

* [R] xgb.DMatrix: silent parameter; improve documentation

* [R] xgb.model.dt.tree code style changes

* [R] update NEWS with parameter changes

* [R] code safety & style; handle non-strict matrix and inherited classes of input and model; fixes #2242

* [R] change to x.y.z.p R-package versioning scheme and set version to 0.6.4.3

* [R] add an R package versioning section to the contributors guide

* [R] R-package/README.md: clean up the redundant old installation instructions, link the contributors guide
This commit is contained in:
Vadim Khotilovich
2017-05-02 00:51:34 -05:00
committed by Tong He
parent d769b6bcb5
commit a375ad2822
29 changed files with 351 additions and 246 deletions

View File

@@ -57,7 +57,7 @@ NULL
#' \code{\link{callbacks}}
#'
#' @export
cb.print.evaluation <- function(period=1, showsd=TRUE) {
cb.print.evaluation <- function(period = 1, showsd = TRUE) {
callback <- function(env = parent.frame()) {
if (length(env$bst_evaluation) == 0 ||
@@ -132,7 +132,7 @@ cb.evaluation.log <- function() {
cnames <- numeric(len)
cnames[c(TRUE, FALSE)] <- means
cnames[c(FALSE, TRUE)] <- stds
env$evaluation_log <- env$evaluation_log[, c('iter', cnames), with=FALSE]
env$evaluation_log <- env$evaluation_log[, c('iter', cnames), with = FALSE]
}
}
@@ -290,8 +290,8 @@ cb.reset.parameters <- function(new_params) {
#' \code{\link{xgb.attr}}
#'
#' @export
cb.early.stop <- function(stopping_rounds, maximize=FALSE,
metric_name=NULL, verbose=TRUE) {
cb.early.stop <- function(stopping_rounds, maximize = FALSE,
metric_name = NULL, verbose = TRUE) {
# state variables
best_iteration <- -1
best_ntreelimit <- -1
@@ -308,7 +308,7 @@ cb.early.stop <- function(stopping_rounds, maximize=FALSE,
metric_idx <<- which(gsub('-', '_', metric_name) == eval_names)
if (length(metric_idx) == 0)
stop("'metric_name' for early stopping is not one of the following:\n",
paste(eval_names, collapse=' '), '\n')
paste(eval_names, collapse = ' '), '\n')
}
if (is.null(metric_name) &&
length(env$bst_evaluation) > 1) {
@@ -334,7 +334,7 @@ cb.early.stop <- function(stopping_rounds, maximize=FALSE,
env$stop_condition <- FALSE
if (!is.null(env$bst)) {
if (class(env$bst) != 'xgb.Booster')
if (!inherits(env$bst, 'xgb.Booster'))
stop("'bst' in the parent frame must be an 'xgb.Booster'")
if (!is.null(best_score <- xgb.attr(env$bst$handle, 'best_score'))) {
best_score <<- as.numeric(best_score)
@@ -529,7 +529,7 @@ cb.cv.predict <- function(save_models = FALSE) {
#
# Format the evaluation metric string
format.eval.string <- function(iter, eval_res, eval_err=NULL) {
format.eval.string <- function(iter, eval_res, eval_err = NULL) {
if (length(eval_res) == 0)
stop('no evaluation results')
enames <- names(eval_res)
@@ -539,9 +539,9 @@ format.eval.string <- function(iter, eval_res, eval_err=NULL) {
if (!is.null(eval_err)) {
if (length(eval_res) != length(eval_err))
stop('eval_res & eval_err lengths mismatch')
res <- paste0(sprintf("%s:%f+%f", enames, eval_res, eval_err), collapse='\t')
res <- paste0(sprintf("%s:%f+%f", enames, eval_res, eval_err), collapse = '\t')
} else {
res <- paste0(sprintf("%s:%f", enames, eval_res), collapse='\t')
res <- paste0(sprintf("%s:%f", enames, eval_res), collapse = '\t')
}
return(paste0(iter, res))
}

View File

@@ -47,7 +47,7 @@ check.booster.params <- function(params, ...) {
multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric')
if (length(multi_names) > 0) {
warning("The following parameters were provided multiple times:\n\t",
paste(multi_names, collapse=', '), "\n Only the last value for each of them will be used.\n")
paste(multi_names, collapse = ', '), "\n Only the last value for each of them will be used.\n")
# While xgboost internals would choose the last value for a multiple-times parameter,
# enforce it here in R as well (b/c multi-parameters might be used further in R code,
# and R takes the 1st value when multiple elements with the same name are present in a list).
@@ -120,22 +120,22 @@ check.custom.eval <- function(env = parent.frame()) {
}
# Update booster with dtrain for an iteration
xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
if (class(booster) != "xgb.Booster.handle") {
stop("first argument type must be xgb.Booster.handle")
# Update a booster handle for an iteration with dtrain data
xgb.iter.update <- function(booster_handle, dtrain, iter, obj = NULL) {
if (!identical(class(booster_handle), "xgb.Booster.handle")) {
stop("booster_handle must be of xgb.Booster.handle class")
}
if (class(dtrain) != "xgb.DMatrix") {
stop("second argument type must be xgb.DMatrix")
if (!inherits(dtrain, "xgb.DMatrix")) {
stop("dtrain must be of xgb.DMatrix class")
}
if (is.null(obj)) {
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
.Call("XGBoosterUpdateOneIter_R", booster_handle, as.integer(iter), dtrain,
PACKAGE = "xgboost")
} else {
pred <- predict(booster, dtrain)
pred <- predict(booster_handle, dtrain)
gpair <- obj(pred, dtrain)
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE = "xgboost")
.Call("XGBoosterBoostOneIter_R", booster_handle, dtrain, gpair$grad, gpair$hess, PACKAGE = "xgboost")
}
return(TRUE)
}
@@ -144,16 +144,16 @@ xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
# Evaluate one iteration.
# Returns a named vector of evaluation metrics
# with the names in a 'datasetname-metricname' format.
xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
if (class(booster) != "xgb.Booster.handle")
stop("first argument type must be xgb.Booster.handle")
xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
if (!identical(class(booster_handle), "xgb.Booster.handle"))
stop("class of booster_handle must be xgb.Booster.handle")
if (length(watchlist) == 0)
return(NULL)
evnames <- names(watchlist)
if (is.null(feval)) {
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
msg <- .Call("XGBoosterEvalOneIter_R", booster_handle, as.integer(iter), watchlist,
as.list(evnames), PACKAGE = "xgboost")
msg <- stri_split_regex(msg, '(\\s+|:|\\s+)')[[1]][-1]
res <- as.numeric(msg[c(FALSE,TRUE)]) # even indices are the values
@@ -161,7 +161,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
} else {
res <- sapply(seq_along(watchlist), function(j) {
w <- watchlist[[j]]
preds <- predict(booster, w) # predict using all trees
preds <- predict(booster_handle, w) # predict using all trees
eval_res <- feval(preds, w)
out <- eval_res$value
names(out) <- paste0(evnames[j], "-", eval_res$metric)
@@ -180,7 +180,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
# cannot do it for rank
if (exists('objective', where=params) &&
if (exists('objective', where = params) &&
is.character(params$objective) &&
strtrim(params$objective, 5) == 'rank:') {
stop("\n\tAutomatic generation of CV-folds is not implemented for ranking!\n",
@@ -195,7 +195,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
# - For classification, need to convert y labels to factor before making the folds,
# and then do stratification by factor levels.
# - For regression, leave y numeric and do stratification by quantiles.
if (exists('objective', where=params) &&
if (exists('objective', where = params) &&
is.character(params$objective)) {
# If 'objective' provided in params, assume that y is a classification label
# unless objective is reg:linear
@@ -306,7 +306,7 @@ depr_par_lut <- matrix(c(
'plot.width','plot_width',
'n_first_tree', 'trees',
'dummy', 'DUMMY'
), ncol=2, byrow = TRUE)
), ncol = 2, byrow = TRUE)
colnames(depr_par_lut) <- c('old', 'new')
# Checks the dot-parameters for deprecated names
@@ -331,7 +331,7 @@ check.deprecation <- function(..., env = parent.frame()) {
if (!ex_match[i]) {
warning("'", pars_par, "' was partially matched to '", old_par,"'")
}
.Deprecated(new_par, old=old_par, package = 'xgboost')
.Deprecated(new_par, old = old_par, package = 'xgboost')
if (new_par != 'NULL') {
eval(parse(text = paste(new_par, '<-', pars[[pars_par]])), envir = env)
}

View File

@@ -1,19 +1,19 @@
# Construct an internal xgboost Booster and return a handle to it
# Construct an internal xgboost Booster and return a handle to it.
# internal utility function
xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(cachelist) != "list" ||
any(sapply(cachelist, class) != 'xgb.DMatrix')) {
stop("xgb.Booster only accepts list of DMatrix as cachelist")
!all(sapply(cachelist, inherits, 'xgb.DMatrix'))) {
stop("cachelist must be a list of xgb.DMatrix objects")
}
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
if (!is.null(modelfile)) {
if (typeof(modelfile) == "character") {
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
.Call("XGBoosterLoadModel_R", handle, modelfile[1], PACKAGE = "xgboost")
} else if (typeof(modelfile) == "raw") {
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
} else if (class(modelfile) == "xgb.Booster") {
bst <- xgb.Booster.complete(modelfile, saveraw=TRUE)
} else if (inherits(modelfile, "xgb.Booster")) {
bst <- xgb.Booster.complete(modelfile, saveraw = TRUE)
.Call("XGBoosterLoadModelFromRaw_R", handle, bst$raw, PACKAGE = "xgboost")
} else {
stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
@@ -37,10 +37,10 @@ xgb.handleToBooster <- function(handle, raw = NULL) {
# Check whether xgb.Booster.handle is null
# internal utility function
is.null.handle <- function(handle) {
if (class(handle) != "xgb.Booster.handle")
if (!identical(class(handle), "xgb.Booster.handle"))
stop("argument type must be xgb.Booster.handle")
if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost"))
if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE = "xgboost"))
return(TRUE)
return(FALSE)
}
@@ -78,8 +78,8 @@ xgb.get.handle <- function(object) {
#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
#' should still work for such a model object since those methods would be using
#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
#' \code{xgb.Booster.complete} function once after loading a model as an R-object. That which would
#' prevent further reconstruction (potentially, multiple times) of an internal booster model.
#' \code{xgb.Booster.complete} function explicitely once after loading a model as an R-object.
#' That would prevent further repeated implicit reconstruction of an internal booster model.
#'
#' @return
#' An object of \code{xgb.Booster} class.
@@ -94,13 +94,14 @@ xgb.get.handle <- function(object) {
#' bst1 <- readRDS("xgb.model.rds")
#' # the handle is invalid:
#' print(bst1$handle)
#'
#' bst1 <- xgb.Booster.complete(bst1)
#' # now the handle points to a valid internal booster model:
#' print(bst1$handle)
#'
#' @export
xgb.Booster.complete <- function(object, saveraw = TRUE) {
if (class(object) != "xgb.Booster")
if (!inherits(object, "xgb.Booster"))
stop("argument type must be xgb.Booster")
if (is.null.handle(object$handle)) {
@@ -225,7 +226,7 @@ predict.xgb.Booster <- function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {
object <- xgb.Booster.complete(object, saveraw = FALSE)
if (class(newdata) != "xgb.DMatrix")
if (!inherits(newdata, "xgb.DMatrix"))
newdata <- xgb.DMatrix(newdata, missing = missing)
if (is.null(ntreelimit))
ntreelimit <- NVL(object$best_ntreelimit, 0)
@@ -337,7 +338,7 @@ predict.xgb.Booster.handle <- function(object, ...) {
xgb.attr <- function(object, name) {
if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name")
handle <- xgb.get.handle(object)
.Call("XGBoosterGetAttr_R", handle, as.character(name[1]), PACKAGE="xgboost")
.Call("XGBoosterGetAttr_R", handle, as.character(name[1]), PACKAGE = "xgboost")
}
#' @rdname xgb.attr
@@ -354,7 +355,7 @@ xgb.attr <- function(object, name) {
value <- as.character(value[1])
}
}
.Call("XGBoosterSetAttr_R", handle, as.character(name[1]), value, PACKAGE="xgboost")
.Call("XGBoosterSetAttr_R", handle, as.character(name[1]), value, PACKAGE = "xgboost")
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
object$raw <- xgb.save.raw(object$handle)
}
@@ -365,10 +366,10 @@ xgb.attr <- function(object, name) {
#' @export
xgb.attributes <- function(object) {
handle <- xgb.get.handle(object)
attr_names <- .Call("XGBoosterGetAttrNames_R", handle, PACKAGE="xgboost")
attr_names <- .Call("XGBoosterGetAttrNames_R", handle, PACKAGE = "xgboost")
if (is.null(attr_names)) return(NULL)
res <- lapply(attr_names, function(x) {
.Call("XGBoosterGetAttr_R", handle, x, PACKAGE="xgboost")
.Call("XGBoosterGetAttr_R", handle, x, PACKAGE = "xgboost")
})
names(res) <- attr_names
res
@@ -393,7 +394,7 @@ xgb.attributes <- function(object) {
})
handle <- xgb.get.handle(object)
for (i in seq_along(a)) {
.Call("XGBoosterSetAttr_R", handle, names(a[i]), a[[i]], PACKAGE="xgboost")
.Call("XGBoosterSetAttr_R", handle, names(a[i]), a[[i]], PACKAGE = "xgboost")
}
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
object$raw <- xgb.save.raw(object$handle)
@@ -442,8 +443,8 @@ xgb.attributes <- function(object) {
object
}
# Extract # of trees in a model
# TODO: either add a getter to C-interface, or simply set an 'ntree' attribute after each iteration
# Extract the number of trees in a model.
# TODO: either add a getter to C-interface, or simply set an 'ntree' attribute after each iteration.
# internal utility function
xgb.ntree <- function(bst) {
length(grep('^booster', xgb.dump(bst)))
@@ -470,7 +471,7 @@ xgb.ntree <- function(bst) {
#'
#' @method print xgb.Booster
#' @export
print.xgb.Booster <- function(x, verbose=FALSE, ...) {
print.xgb.Booster <- function(x, verbose = FALSE, ...) {
cat('##### xgb.Booster\n')
valid_handle <- is.null.handle(x$handle)
@@ -479,7 +480,7 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
cat('raw: ')
if (!is.null(x$raw)) {
cat(format(object.size(x$raw), units="auto"), '\n')
cat(format(object.size(x$raw), units = "auto"), '\n')
} else {
cat('NULL\n')
}
@@ -493,7 +494,7 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
cat( ' ',
paste(names(x$params),
paste0('"', unlist(x$params), '"'),
sep=' = ', collapse=', '), '\n', sep='')
sep = ' = ', collapse = ', '), '\n', sep = '')
}
# TODO: need an interface to access all the xgboosts parameters
@@ -505,9 +506,9 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
if (verbose) {
cat( paste(paste0(' ',names(attrs)),
paste0('"', unlist(attrs), '"'),
sep=' = ', collapse='\n'), '\n', sep='')
sep = ' = ', collapse = '\n'), '\n', sep = '')
} else {
cat(' ', paste(names(attrs), collapse=', '), '\n', sep='')
cat(' ', paste(names(attrs), collapse = ', '), '\n', sep = '')
}
}
@@ -522,16 +523,16 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
if (!is.null(x$feature_names))
cat('# of features:', length(x$feature_names), '\n')
cat('niter: ', x$niter, '\n', sep='')
cat('niter: ', x$niter, '\n', sep = '')
# TODO: uncomment when faster xgb.ntree is implemented
#cat('ntree: ', xgb.ntree(x), '\n', sep='')
for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
'evaluation_log','niter','feature_names'))) {
if (is.atomic(x[[n]])) {
cat(n, ':', x[[n]], '\n', sep=' ')
cat(n, ':', x[[n]], '\n', sep = ' ')
} else {
cat(n, ':\n\t', sep=' ')
cat(n, ':\n\t', sep = ' ')
print(x[[n]])
}
}

View File

@@ -1,14 +1,17 @@
#' Construct xgb.DMatrix object
#'
#' Contruct xgb.DMatrix object from dense matrix, sparse matrix
#' or local file (that was created previously by saving an \code{xgb.DMatrix}).
#' Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file.
#' Supported input file formats are either a libsvm text file or a binary file that was created previously by
#' \code{\link{xgb.DMatrix.save}}).
#'
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
#' @param info a list of information of the xgb.DMatrix object
#' @param missing Missing is only used when input is dense matrix, pick a float
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
#
#' @param ... other information to pass to \code{info}.
#' @param data a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
#' string representing a filename.
#' @param info a named list of additional information to store in the \code{xgb.DMatrix} object.
#' See \code{\link{setinfo}} for the specific allowed kinds of
#' @param missing a float value to represents missing values in data (used only when input is a dense matrix).
#' It is useful when a 0 or some other extreme value represents missing values in data.
#' @param silent whether to suppress printing an informational message after loading from a file.
#' @param ... the \code{info} data could be passed directly as parameters, without creating an \code{info} list.
#'
#' @examples
#' data(agaricus.train, package='xgboost')
@@ -17,19 +20,19 @@
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
#' @export
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
xgb.DMatrix <- function(data, info = list(), missing = NA, silent = FALSE, ...) {
cnames <- NULL
if (typeof(data) == "character") {
if (length(data) > 1)
stop("'data' has class 'character' and length ", length(data),
".\n 'data' accepts either a numeric matrix or a single filename.")
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(silent),
PACKAGE = "xgboost")
} else if (is.matrix(data)) {
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing,
PACKAGE = "xgboost")
cnames <- colnames(data)
} else if (class(data) == "dgCMatrix") {
} else if (inherits(data, "dgCMatrix")) {
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, nrow(data),
PACKAGE = "xgboost")
cnames <- colnames(data)
@@ -51,10 +54,9 @@ xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
# get dmatrix from data, label
# internal helper method
xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
inClass <- class(data)
if ("dgCMatrix" %in% inClass || "matrix" %in% inClass ) {
if (inherits(data, "dgCMatrix") || is.matrix(data)) {
if (is.null(label)) {
stop("xgboost: need label when data is a matrix")
stop("label must be provided when data is a matrix")
}
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
if (!is.null(weight)){
@@ -64,11 +66,11 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
if (!is.null(label)) {
warning("xgboost: label will be ignored.")
}
if (inClass == "character") {
dtrain <- xgb.DMatrix(data)
} else if (inClass == "xgb.DMatrix") {
if (is.character(data)) {
dtrain <- xgb.DMatrix(data[1])
} else if (inherits(data, "xgb.DMatrix")) {
dtrain <- data
} else if ("data.frame" %in% inClass) {
} else if (inherits(data, "data.frame")) {
stop("xgboost doesn't support data.frame as input. Convert it to matrix first.")
} else {
stop("xgboost: invalid input data")
@@ -98,8 +100,8 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
#'
#' @export
dim.xgb.DMatrix <- function(x) {
c(.Call("XGDMatrixNumRow_R", x, PACKAGE="xgboost"),
.Call("XGDMatrixNumCol_R", x, PACKAGE="xgboost"))
c(.Call("XGDMatrixNumRow_R", x, PACKAGE = "xgboost"),
.Call("XGDMatrixNumCol_R", x, PACKAGE = "xgboost"))
}
@@ -297,8 +299,8 @@ slice <- function(object, ...) UseMethod("slice")
#' @rdname slice.xgb.DMatrix
#' @export
slice.xgb.DMatrix <- function(object, idxset, ...) {
if (class(object) != "xgb.DMatrix") {
stop("slice: first argument dtrain must be xgb.DMatrix")
if (!inherits(object, "xgb.DMatrix")) {
stop("object must be xgb.DMatrix")
}
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, PACKAGE = "xgboost")
@@ -317,7 +319,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
#' @rdname slice.xgb.DMatrix
#' @export
`[.xgb.DMatrix` <- function(object, idxset, colset=NULL) {
`[.xgb.DMatrix` <- function(object, idxset, colset = NULL) {
slice(object, idxset)
}
@@ -341,7 +343,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
#'
#' @method print xgb.DMatrix
#' @export
print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
print.xgb.DMatrix <- function(x, verbose = FALSE, ...) {
cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ')
infos <- c()
if(length(getinfo(x, 'label')) > 0) infos <- 'label'
@@ -353,7 +355,7 @@ print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
cat(' colnames:')
if (verbose & !is.null(cnames)) {
cat("\n'")
cat(cnames, sep="','")
cat(cnames, sep = "','")
cat("'")
} else {
if (is.null(cnames)) cat(' no')

View File

@@ -15,9 +15,9 @@
xgb.DMatrix.save <- function(dmatrix, fname) {
if (typeof(fname) != "character")
stop("fname must be character")
if (class(dmatrix) != "xgb.DMatrix")
stop("the input data must be xgb.DMatrix")
if (!inherits(dmatrix, "xgb.DMatrix"))
stop("dmatrix must be xgb.DMatrix")
.Call("XGDMatrixSaveBinary_R", dmatrix, fname, 0L, PACKAGE = "xgboost")
.Call("XGDMatrixSaveBinary_R", dmatrix, fname[1], 0L, PACKAGE = "xgboost")
return(TRUE)
}

View File

@@ -130,13 +130,13 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
# stop("Either 'eval_metric' or 'feval' must be provided for CV")
# Check the labels
if ( (class(data) == 'xgb.DMatrix' && is.null(getinfo(data, 'label'))) ||
(class(data) != 'xgb.DMatrix' && is.null(label)))
if ( (inherits(data, 'xgb.DMatrix') && is.null(getinfo(data, 'label'))) ||
(!inherits(data, 'xgb.DMatrix') && is.null(label)))
stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix")
# CV folds
if(!is.null(folds)) {
if(class(folds) != "list" || length(folds) < 2)
if(!is.list(folds) || length(folds) < 2)
stop("'folds' must be a list with 2 or more elements that are vectors of indices for each CV-fold")
nfold <- length(folds)
} else {
@@ -153,7 +153,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
params <- c(params, list(silent = 1))
print_every_n <- max( as.integer(print_every_n), 1L)
if (!has.callbacks(callbacks, 'cb.print.evaluation') && verbose) {
callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n, showsd=showsd))
callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n, showsd = showsd))
}
# evaluation log callback: always is on in CV
evaluation_log <- list()
@@ -165,12 +165,12 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
if (!is.null(early_stopping_rounds) &&
!has.callbacks(callbacks, 'cb.early.stop')) {
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
maximize=maximize, verbose=verbose))
maximize = maximize, verbose = verbose))
}
# CV-predictions callback
if (prediction &&
!has.callbacks(callbacks, 'cb.cv.predict')) {
callbacks <- add.cb(callbacks, cb.cv.predict(save_models=FALSE))
callbacks <- add.cb(callbacks, cb.cv.predict(save_models = FALSE))
}
# Sort the callbacks into categories
cb <- categorize.callbacks(callbacks)
@@ -182,7 +182,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
dtest <- slice(dall, folds[[k]])
dtrain <- slice(dall, unlist(folds[-k]))
handle <- xgb.Booster.handle(params, list(dtrain, dtest))
list(dtrain=dtrain, bst=handle, watchlist=list(train=dtrain, test=dtest), index=folds[[k]])
list(dtrain = dtrain, bst = handle, watchlist = list(train = dtrain, test=dtest), index = folds[[k]])
})
# a "basket" to collect some results from callbacks
basket <- list()
@@ -212,7 +212,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
if (stop_condition) break
}
for (f in cb$finalize) f(finalize=TRUE)
for (f in cb$finalize) f(finalize = TRUE)
# the CV result
ret <- list(
@@ -254,8 +254,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
#' @rdname print.xgb.cv
#' @method print xgb.cv.synchronous
#' @export
print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) {
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep='')
print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep = '')
if (verbose) {
if (!is.null(x$call)) {
@@ -267,7 +267,7 @@ print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) {
cat( ' ',
paste(names(x$params),
paste0('"', unlist(x$params), '"'),
sep=' = ', collapse=', '), '\n', sep='')
sep = ' = ', collapse = ', '), '\n', sep = '')
}
if (!is.null(x$callbacks) && length(x$callbacks) > 0) {
cat('callbacks:\n')
@@ -280,7 +280,7 @@ print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) {
for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
if (is.null(x[[n]]))
next
cat(n, ': ', x[[n]], '\n', sep='')
cat(n, ': ', x[[n]], '\n', sep = '')
}
if (!is.null(x$pred)) {

View File

@@ -39,19 +39,19 @@
#' cat(xgb.dump(bst, with_stats = TRUE, dump_format='json'))
#'
#' @export
xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE,
xgb.dump <- function(model, fname = NULL, fmap = "", with_stats=FALSE,
dump_format = c("text", "json"), ...) {
check.deprecation(...)
dump_format <- match.arg(dump_format)
if (class(model) != "xgb.Booster")
if (!inherits(model, "xgb.Booster"))
stop("model: argument must be of type xgb.Booster")
if (!(class(fname) %in% c("character", "NULL") && length(fname) <= 1))
stop("fname: argument must be of type character (when provided)")
if (!(class(fmap) %in% c("character", "NULL") && length(fmap) <= 1))
stop("fmap: argument must be of type character (when provided)")
if (!(is.null(fname) || is.character(fname)))
stop("fname: argument must be a character string (when provided)")
if (!(is.null(fmap) || is.character(fmap)))
stop("fmap: argument must be a character string (when provided)")
model <- xgb.Booster.complete(model)
model_dump <- .Call("XGBoosterDumpModel_R", model$handle, fmap, as.integer(with_stats),
model_dump <- .Call("XGBoosterDumpModel_R", model$handle, NVL(fmap, "")[1], as.integer(with_stats),
as.character(dump_format), PACKAGE = "xgboost")
if (is.null(fname))
@@ -65,7 +65,7 @@ xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE,
if (is.null(fname)) {
return(model_dump)
} else {
writeLines(model_dump, fname)
writeLines(model_dump, fname[1])
return(TRUE)
}
}

View File

@@ -58,13 +58,13 @@ xgb.importance <- function(feature_names = NULL, model = NULL,
if (!(is.null(data) && is.null(label) && is.null(target)))
warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
if (class(model) != "xgb.Booster")
stop("Either 'model' has to be an object of class xgb.Booster")
if (!inherits(model, "xgb.Booster"))
stop("model: must be an object of class xgb.Booster")
if (is.null(feature_names) && !is.null(model$feature_names))
feature_names <- model$feature_names
if (!class(feature_names) %in% c("character", "NULL"))
if (!(is.null(feature_names) || is.character(feature_names)))
stop("feature_names: Has to be a character vector")
model_text_dump <- xgb.dump(model = model, with_stats = TRUE)
@@ -76,6 +76,8 @@ xgb.importance <- function(feature_names = NULL, model = NULL,
as.numeric
if(is.null(feature_names))
feature_names <- seq(to = length(weights))
if (length(feature_names) != length(weights))
stop("feature_names has less elements than there are features used in the model")
result <- data.table(Feature = feature_names, Weight = weights)[order(-abs(Weight))]
} else {
# tree model

View File

@@ -14,6 +14,8 @@
#' It could be useful, e.g., in multiclass classification to get only
#' the trees of one certain class. IMPORTANT: the tree index in xgboost models
#' is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).
#' @param use_int_id a logical flag indicating whether nodes in columns "Yes", "No", "Missing" should be
#' represented as integers (when FALSE) or as "Tree-Node" character strings (when FALSE).
#' @param ... currently not used.
#'
#' @return
@@ -22,9 +24,9 @@
#' The columns of the \code{data.table} are:
#'
#' \itemize{
#' \item \code{Tree}: ID of a tree in a model (integer)
#' \item \code{Node}: integer ID of a node in a tree (integer)
#' \item \code{ID}: identifier of a node in a model (character)
#' \item \code{Tree}: integer ID of a tree in a model (zero-based index)
#' \item \code{Node}: integer ID of a node in a tree (zero-based index)
#' \item \code{ID}: character identifier of a node in a model (only when \code{use_int_id=FALSE})
#' \item \code{Feature}: for a branch node, it's a feature id or name (when available);
#' for a leaf note, it simply labels it as \code{'Leaf'}
#' \item \code{Split}: location of the split for a branch node (split condition is always "less than")
@@ -36,6 +38,10 @@
#' or collected by a leaf during training.
#' }
#'
#' When \code{use_int_id=FALSE}, columns "Yes", "No", and "Missing" point to model-wide node identifiers
#' in the "ID" column. When \code{use_int_id=TRUE}, those columns point to node identifiers from
#' the corresponding trees in the "Node" column.
#'
#' @examples
#' # Basic use:
#'
@@ -45,8 +51,9 @@
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#'
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
#' # This bst has feature_names stored in it, so those would be used when
#' # the feature_names parameter is not provided:
#'
#' # This bst model already has feature_names stored with it, so those would be used when
#' # feature_names is not set:
#' (dt <- xgb.model.dt.tree(model = bst))
#'
#' # How to match feature names of splits that are following a current 'Yes' branch:
@@ -55,24 +62,24 @@
#'
#' @export
xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
trees = NULL, ...){
trees = NULL, use_int_id = FALSE, ...){
check.deprecation(...)
if (class(model) != "xgb.Booster" & class(text) != "character") {
stop("Either 'model' has to be an object of class xgb.Booster\n",
" or 'text' has to be a character vector with the result of xgb.dump\n",
" (or NULL if the model was provided).")
if (!inherits(model, "xgb.Booster") & !is.character(text)) {
stop("Either 'model' must be an object of class xgb.Booster\n",
" or 'text' must be a character vector with the result of xgb.dump\n",
" (or NULL if 'model' was provided).")
}
if (is.null(feature_names) && !is.null(model) && !is.null(model$feature_names))
feature_names <- model$feature_names
if (!class(feature_names) %in% c("character", "NULL")) {
stop("feature_names: Has to be a character vector")
if (!(is.null(feature_names) || is.character(feature_names))) {
stop("feature_names: must be a character vector")
}
if (!class(trees) %in% c("integer", "numeric", "NULL")) {
stop("trees: Has to be a vector of integers.")
if (!(is.null(trees) || is.numeric(trees))) {
stop("trees: must be a vector of integers.")
}
if (is.null(text)){
@@ -86,11 +93,11 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
position <- which(!is.na(stri_match_first_regex(text, "booster")))
add.tree.id <- function(x, i) paste(i, x, sep = "-")
add.tree.id <- function(node, tree) if (use_int_id) node else paste(tree, node, sep = "-")
anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
td <- data.table(t=text)
td <- data.table(t = text)
td[position, Tree := 1L]
td[, Tree := cumsum(ifelse(is.na(Tree), 0L, Tree)) - 1L]
@@ -102,32 +109,43 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
td <- td[Tree %in% trees & !grepl('^booster', t)]
td[, Node := stri_match_first_regex(t, "(\\d+):")[,2] %>% as.integer ]
td[, ID := add.tree.id(Node, Tree)]
if (!use_int_id) td[, ID := add.tree.id(Node, Tree)]
td[, isLeaf := !is.na(stri_match_first_regex(t, "leaf"))]
# parse branch lines
td[isLeaf==FALSE, c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover") := {
rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
# skip some indices with spurious capture groups from anynumber_regex
xtr <- stri_match_first_regex(t, rx)[, c(2,3,5,6,7,8,10)]
xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
lapply(1:ncol(xtr), function(i) xtr[,i])
}]
branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
td[isLeaf == FALSE,
(branch_cols) := {
# skip some indices with spurious capture groups from anynumber_regex
xtr <- stri_match_first_regex(t, branch_rx)[, c(2,3,5,6,7,8,10), drop = FALSE]
xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
lapply(1:ncol(xtr), function(i) xtr[,i])
}]
# assign feature_names when available
td[isLeaf==FALSE & !is.null(feature_names),
Feature := feature_names[as.numeric(Feature) + 1] ]
if (!is.null(feature_names)) {
if (length(feature_names) <= max(as.numeric(td$Feature), na.rm = TRUE))
stop("feature_names has less elements than there are features used in the model")
td[isLeaf == FALSE, Feature := feature_names[as.numeric(Feature) + 1] ]
}
# parse leaf lines
td[isLeaf==TRUE, c("Feature", "Quality", "Cover") := {
rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
xtr <- stri_match_first_regex(t, rx)[, c(2,4)]
c("Leaf", lapply(1:ncol(xtr), function(i) xtr[,i]))
}]
leaf_rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
leaf_cols <- c("Feature", "Quality", "Cover")
td[isLeaf == TRUE,
(leaf_cols) := {
xtr <- stri_match_first_regex(t, leaf_rx)[, c(2,4)]
c("Leaf", lapply(1:ncol(xtr), function(i) xtr[,i]))
}]
# convert some columns to numeric
numeric_cols <- c("Split", "Quality", "Cover")
td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols=numeric_cols]
td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols]
if (use_int_id) {
int_cols <- c("Yes", "No", "Missing")
td[, (int_cols) := lapply(.SD, as.integer), .SDcols = int_cols]
}
td[, t := NULL]
td[, isLeaf := NULL]

View File

@@ -63,7 +63,7 @@
xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.depth", "med.weight"),
plot = TRUE, ...) {
if (!(class(model) == "xgb.Booster" || is.data.table(model)))
if (!(inherits(model, "xgb.Booster") || is.data.table(model)))
stop("model: Has to be either an xgb.Booster model generaged by the xgb.train function\n",
"or a data.table result of the xgb.importance function")
@@ -73,14 +73,14 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d
which <- match.arg(which)
dt_tree <- model
if (class(model) == "xgb.Booster")
if (inherits(model, "xgb.Booster"))
dt_tree <- xgb.model.dt.tree(model = model)
if (!all(c("Feature", "Tree", "ID", "Yes", "No", "Cover") %in% colnames(dt_tree)))
stop("Model tree columns are not as expected!\n",
" Note that this function works only for tree models.")
dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight=Quality)], by = "ID")
dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Quality)], by = "ID")
setkeyv(dt_depths, c("Tree", "ID"))
# count by depth levels, and also calculate average cover at a depth
dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth]
@@ -89,13 +89,13 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d
if (plot) {
if (which == "2x1") {
op <- par(no.readonly = TRUE)
par(mfrow=c(2,1),
par(mfrow = c(2,1),
oma = c(3,1,3,1) + 0.1,
mar = c(1,4,1,0) + 0.1)
dt_summaries[, barplot(N, border=NA, ylab = 'Number of leafs', ...)]
dt_summaries[, barplot(N, border = NA, ylab = 'Number of leafs', ...)]
dt_summaries[, barplot(Cover, border=NA, ylab = "Weighted cover", names.arg=Depth, ...)]
dt_summaries[, barplot(Cover, border = NA, ylab = "Weighted cover", names.arg = Depth, ...)]
title("Model complexity", xlab = "Leaf depth", outer = TRUE, line = 1)
par(op)
@@ -119,8 +119,8 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d
get.leaf.depth <- function(dt_tree) {
# extract tree graph's edges
dt_edges <- rbindlist(list(
dt_tree[Feature != "Leaf", .(ID, To=Yes, Tree)],
dt_tree[Feature != "Leaf", .(ID, To=No, Tree)]
dt_tree[Feature != "Leaf", .(ID, To = Yes, Tree)],
dt_tree[Feature != "Leaf", .(ID, To = No, Tree)]
))
# whether "To" is a leaf:
dt_edges <-

View File

@@ -61,8 +61,8 @@
xgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure = NULL,
rel_to_first = FALSE, left_margin = 10, cex = NULL, plot = TRUE, ...) {
check.deprecation(...)
if (!"data.table" %in% class(importance_matrix)) {
stop("importance_matrix: Should be a data.table.")
if (!is.data.table(importance_matrix)) {
stop("importance_matrix: must be a data.table")
}
imp_names <- colnames(importance_matrix)
@@ -107,12 +107,12 @@ xgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure
# reverse the order of rows to have the highest ranked at the top
importance_matrix[nrow(importance_matrix):1,
barplot(Importance, horiz=TRUE, border=NA, cex.names=cex,
names.arg=Feature, las=1, ...)]
barplot(Importance, horiz = TRUE, border = NA, cex.names = cex,
names.arg = Feature, las = 1, ...)]
grid(NULL, NA)
# redraw over the grid
importance_matrix[nrow(importance_matrix):1,
barplot(Importance, horiz=TRUE, border=NA, add=TRUE)]
barplot(Importance, horiz = TRUE, border = NA, add = TRUE)]
par(op)
}

View File

@@ -11,7 +11,7 @@
#' @param plot_width the width of the diagram in pixels.
#' @param plot_height the height of the diagram in pixels.
#' @param render a logical flag for whether the graph should be rendered (see Value).
#' @param show_node_id a logical flag for whether to include node id's in the graph.
#' @param show_node_id a logical flag for whether to show node id's in the graph.
#' @param ... currently not used.
#'
#' @details
@@ -53,17 +53,25 @@
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
#' # plot all the trees
#' xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst)
#' # plot only the first tree and include the node ID:
#' xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst,
#' trees = 0, show_node_id = TRUE)
#' xgb.plot.tree(model = bst)
#' # plot only the first tree and display the node ID:
#' xgb.plot.tree(model = bst, trees = 0, show_node_id = TRUE)
#'
#' \dontrun{
#' # Below is an example of how to save this plot to a file.
#' # Note that for `export_graph` to work, the DiagrammeRsvg and rsvg packages must also be installed.
#' library(DiagrammeR)
#' gr <- xgb.plot.tree(model=bst, trees=0:1, render=FALSE)
#' export_graph(gr, 'tree.pdf', width=1500, height=1900)
#' export_graph(gr, 'tree.png', width=1500, height=1900)
#' }
#'
#' @export
xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL,
render = TRUE, show_node_id = FALSE, ...){
check.deprecation(...)
if (class(model) != "xgb.Booster") {
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
if (!inherits(model, "xgb.Booster")) {
stop("model: Has to be an object of class xgb.Booster")
}
if (!requireNamespace("DiagrammeR", quietly = TRUE)) {

View File

@@ -32,10 +32,11 @@
xgb.save <- function(model, fname) {
if (typeof(fname) != "character")
stop("fname must be character")
if (class(model) != "xgb.Booster")
stop("the input must be xgb.Booster. Use xgb.DMatrix.save to save xgb.DMatrix object.")
if (!inherits(model, "xgb.Booster")) {
stop("model must be xgb.Booster.",
if (inherits(model, "xgb.DMatrix")) " Use xgb.DMatrix.save to save an xgb.DMatrix object." else "")
}
model <- xgb.Booster.complete(model, saveraw = FALSE)
.Call("XGBoosterSaveModel_R", model$handle, fname, PACKAGE = "xgboost")
.Call("XGBoosterSaveModel_R", model$handle, fname[1], PACKAGE = "xgboost")
return(TRUE)
}

View File

@@ -233,7 +233,7 @@
#' @rdname xgb.train
#' @export
xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
obj = NULL, feval = NULL, verbose = 1, print_every_n=1L,
obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
early_stopping_rounds = NULL, maximize = NULL,
save_period = NULL, save_name = "xgboost.model",
xgb_model = NULL, callbacks = list(), ...) {
@@ -247,11 +247,11 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
# data & watchlist checks
dtrain <- data
if (class(dtrain) != "xgb.DMatrix")
if (!inherits(dtrain, "xgb.DMatrix"))
stop("second argument dtrain must be xgb.DMatrix")
if (length(watchlist) > 0) {
if (typeof(watchlist) != "list" ||
!all(sapply(watchlist, class) == "xgb.DMatrix"))
!all(sapply(watchlist, inherits, 'xgb.DMatrix')))
stop("watchlist must be a list of xgb.DMatrix elements")
evnames <- names(watchlist)
if (is.null(evnames) || any(evnames == ""))
@@ -281,7 +281,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
if (!is.null(early_stopping_rounds) &&
!has.callbacks(callbacks, 'cb.early.stop')) {
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
maximize=maximize, verbose=verbose))
maximize = maximize, verbose = verbose))
}
# Sort the callbacks into categories
cb <- categorize.callbacks(callbacks)
@@ -332,7 +332,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
if (stop_condition) break
}
for (f in cb$finalize) f(finalize=TRUE)
for (f in cb$finalize) f(finalize = TRUE)
bst <- xgb.Booster.complete(bst, saveraw = TRUE)
@@ -343,7 +343,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
if (length(evaluation_log) > 0 &&
nrow(evaluation_log) > 0) {
# include the previous compatible history when available
if (class(xgb_model) == 'xgb.Booster' &&
if (inherits(xgb_model, 'xgb.Booster') &&
!is_update &&
!is.null(xgb_model$evaluation_log) &&
all.equal(colnames(evaluation_log),

View File

@@ -1,4 +1,4 @@
# Simple interface for training an xgboost model that wraps \code{xgb.train}
# Simple interface for training an xgboost model that wraps \code{xgb.train}.
# Its documentation is combined with xgb.train.
#
#' @rdname xgb.train
@@ -7,7 +7,7 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
params = list(), nrounds,
verbose = 1, print_every_n = 1L,
early_stopping_rounds = NULL, maximize = NULL,
save_period = 0, save_name = "xgboost.model",
save_period = NULL, save_name = "xgboost.model",
xgb_model = NULL, callbacks = list(), ...) {
dtrain <- xgb.get.DMatrix(data, label, missing, weight)