[R] maintenance Apr 2017 (#2237)
* [R] make sure things work for a single split model; fixes #2191 * [R] add option use_int_id to xgb.model.dt.tree * [R] add example of exporting tree plot to a file * [R] set save_period = NULL as default in xgboost() to be the same as in xgb.train; fixes #2182 * [R] it's a good practice after CRAN releases to bump up package version in dev * [R] allow xgb.DMatrix construction from integer dense matrices * [R] xgb.DMatrix: silent parameter; improve documentation * [R] xgb.model.dt.tree code style changes * [R] update NEWS with parameter changes * [R] code safety & style; handle non-strict matrix and inherited classes of input and model; fixes #2242 * [R] change to x.y.z.p R-package versioning scheme and set version to 0.6.4.3 * [R] add an R package versioning section to the contributors guide * [R] R-package/README.md: clean up the redundant old installation instructions, link the contributors guide
This commit is contained in:
parent
d769b6bcb5
commit
a375ad2822
5
NEWS.md
5
NEWS.md
@ -12,6 +12,11 @@ This file records the changes in xgboost library in reverse chronological order.
|
|||||||
- Thread local variable is upgraded so it is automatically freed at thread exit.
|
- Thread local variable is upgraded so it is automatically freed at thread exit.
|
||||||
* Migrate to C++11
|
* Migrate to C++11
|
||||||
- The current master version now requires C++11 enabled compiled(g++4.8 or higher)
|
- The current master version now requires C++11 enabled compiled(g++4.8 or higher)
|
||||||
|
* R package:
|
||||||
|
- New parameters:
|
||||||
|
- `silent` in `xgb.DMatrix()`
|
||||||
|
- `use_int_id` in `xgb.model.dt.tree()`
|
||||||
|
- Default value of the `save_period` parameter in `xgboost()` changed to NULL (consistent with `xgb.train()`).
|
||||||
|
|
||||||
## v0.6 (2016.07.29)
|
## v0.6 (2016.07.29)
|
||||||
* Version 0.5 is skipped due to major improvements in the core
|
* Version 0.5 is skipped due to major improvements in the core
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
Package: xgboost
|
Package: xgboost
|
||||||
Type: Package
|
Type: Package
|
||||||
Title: Extreme Gradient Boosting
|
Title: Extreme Gradient Boosting
|
||||||
Version: 0.6-4
|
Version: 0.6.4.3
|
||||||
Date: 2017-01-04
|
Date: 2017-01-04
|
||||||
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
Author: Tianqi Chen <tianqi.tchen@gmail.com>, Tong He <hetong007@gmail.com>,
|
||||||
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
|
Michael Benesty <michael@benesty.fr>, Vadim Khotilovich <khotilovich@gmail.com>,
|
||||||
|
|||||||
@ -57,7 +57,7 @@ NULL
|
|||||||
#' \code{\link{callbacks}}
|
#' \code{\link{callbacks}}
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
cb.print.evaluation <- function(period=1, showsd=TRUE) {
|
cb.print.evaluation <- function(period = 1, showsd = TRUE) {
|
||||||
|
|
||||||
callback <- function(env = parent.frame()) {
|
callback <- function(env = parent.frame()) {
|
||||||
if (length(env$bst_evaluation) == 0 ||
|
if (length(env$bst_evaluation) == 0 ||
|
||||||
@ -132,7 +132,7 @@ cb.evaluation.log <- function() {
|
|||||||
cnames <- numeric(len)
|
cnames <- numeric(len)
|
||||||
cnames[c(TRUE, FALSE)] <- means
|
cnames[c(TRUE, FALSE)] <- means
|
||||||
cnames[c(FALSE, TRUE)] <- stds
|
cnames[c(FALSE, TRUE)] <- stds
|
||||||
env$evaluation_log <- env$evaluation_log[, c('iter', cnames), with=FALSE]
|
env$evaluation_log <- env$evaluation_log[, c('iter', cnames), with = FALSE]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -290,8 +290,8 @@ cb.reset.parameters <- function(new_params) {
|
|||||||
#' \code{\link{xgb.attr}}
|
#' \code{\link{xgb.attr}}
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
cb.early.stop <- function(stopping_rounds, maximize=FALSE,
|
cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||||
metric_name=NULL, verbose=TRUE) {
|
metric_name = NULL, verbose = TRUE) {
|
||||||
# state variables
|
# state variables
|
||||||
best_iteration <- -1
|
best_iteration <- -1
|
||||||
best_ntreelimit <- -1
|
best_ntreelimit <- -1
|
||||||
@ -308,7 +308,7 @@ cb.early.stop <- function(stopping_rounds, maximize=FALSE,
|
|||||||
metric_idx <<- which(gsub('-', '_', metric_name) == eval_names)
|
metric_idx <<- which(gsub('-', '_', metric_name) == eval_names)
|
||||||
if (length(metric_idx) == 0)
|
if (length(metric_idx) == 0)
|
||||||
stop("'metric_name' for early stopping is not one of the following:\n",
|
stop("'metric_name' for early stopping is not one of the following:\n",
|
||||||
paste(eval_names, collapse=' '), '\n')
|
paste(eval_names, collapse = ' '), '\n')
|
||||||
}
|
}
|
||||||
if (is.null(metric_name) &&
|
if (is.null(metric_name) &&
|
||||||
length(env$bst_evaluation) > 1) {
|
length(env$bst_evaluation) > 1) {
|
||||||
@ -334,7 +334,7 @@ cb.early.stop <- function(stopping_rounds, maximize=FALSE,
|
|||||||
env$stop_condition <- FALSE
|
env$stop_condition <- FALSE
|
||||||
|
|
||||||
if (!is.null(env$bst)) {
|
if (!is.null(env$bst)) {
|
||||||
if (class(env$bst) != 'xgb.Booster')
|
if (!inherits(env$bst, 'xgb.Booster'))
|
||||||
stop("'bst' in the parent frame must be an 'xgb.Booster'")
|
stop("'bst' in the parent frame must be an 'xgb.Booster'")
|
||||||
if (!is.null(best_score <- xgb.attr(env$bst$handle, 'best_score'))) {
|
if (!is.null(best_score <- xgb.attr(env$bst$handle, 'best_score'))) {
|
||||||
best_score <<- as.numeric(best_score)
|
best_score <<- as.numeric(best_score)
|
||||||
@ -529,7 +529,7 @@ cb.cv.predict <- function(save_models = FALSE) {
|
|||||||
#
|
#
|
||||||
|
|
||||||
# Format the evaluation metric string
|
# Format the evaluation metric string
|
||||||
format.eval.string <- function(iter, eval_res, eval_err=NULL) {
|
format.eval.string <- function(iter, eval_res, eval_err = NULL) {
|
||||||
if (length(eval_res) == 0)
|
if (length(eval_res) == 0)
|
||||||
stop('no evaluation results')
|
stop('no evaluation results')
|
||||||
enames <- names(eval_res)
|
enames <- names(eval_res)
|
||||||
@ -539,9 +539,9 @@ format.eval.string <- function(iter, eval_res, eval_err=NULL) {
|
|||||||
if (!is.null(eval_err)) {
|
if (!is.null(eval_err)) {
|
||||||
if (length(eval_res) != length(eval_err))
|
if (length(eval_res) != length(eval_err))
|
||||||
stop('eval_res & eval_err lengths mismatch')
|
stop('eval_res & eval_err lengths mismatch')
|
||||||
res <- paste0(sprintf("%s:%f+%f", enames, eval_res, eval_err), collapse='\t')
|
res <- paste0(sprintf("%s:%f+%f", enames, eval_res, eval_err), collapse = '\t')
|
||||||
} else {
|
} else {
|
||||||
res <- paste0(sprintf("%s:%f", enames, eval_res), collapse='\t')
|
res <- paste0(sprintf("%s:%f", enames, eval_res), collapse = '\t')
|
||||||
}
|
}
|
||||||
return(paste0(iter, res))
|
return(paste0(iter, res))
|
||||||
}
|
}
|
||||||
|
|||||||
@ -47,7 +47,7 @@ check.booster.params <- function(params, ...) {
|
|||||||
multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric')
|
multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric')
|
||||||
if (length(multi_names) > 0) {
|
if (length(multi_names) > 0) {
|
||||||
warning("The following parameters were provided multiple times:\n\t",
|
warning("The following parameters were provided multiple times:\n\t",
|
||||||
paste(multi_names, collapse=', '), "\n Only the last value for each of them will be used.\n")
|
paste(multi_names, collapse = ', '), "\n Only the last value for each of them will be used.\n")
|
||||||
# While xgboost internals would choose the last value for a multiple-times parameter,
|
# While xgboost internals would choose the last value for a multiple-times parameter,
|
||||||
# enforce it here in R as well (b/c multi-parameters might be used further in R code,
|
# enforce it here in R as well (b/c multi-parameters might be used further in R code,
|
||||||
# and R takes the 1st value when multiple elements with the same name are present in a list).
|
# and R takes the 1st value when multiple elements with the same name are present in a list).
|
||||||
@ -120,22 +120,22 @@ check.custom.eval <- function(env = parent.frame()) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# Update booster with dtrain for an iteration
|
# Update a booster handle for an iteration with dtrain data
|
||||||
xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
|
xgb.iter.update <- function(booster_handle, dtrain, iter, obj = NULL) {
|
||||||
if (class(booster) != "xgb.Booster.handle") {
|
if (!identical(class(booster_handle), "xgb.Booster.handle")) {
|
||||||
stop("first argument type must be xgb.Booster.handle")
|
stop("booster_handle must be of xgb.Booster.handle class")
|
||||||
}
|
}
|
||||||
if (class(dtrain) != "xgb.DMatrix") {
|
if (!inherits(dtrain, "xgb.DMatrix")) {
|
||||||
stop("second argument type must be xgb.DMatrix")
|
stop("dtrain must be of xgb.DMatrix class")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is.null(obj)) {
|
if (is.null(obj)) {
|
||||||
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
|
.Call("XGBoosterUpdateOneIter_R", booster_handle, as.integer(iter), dtrain,
|
||||||
PACKAGE = "xgboost")
|
PACKAGE = "xgboost")
|
||||||
} else {
|
} else {
|
||||||
pred <- predict(booster, dtrain)
|
pred <- predict(booster_handle, dtrain)
|
||||||
gpair <- obj(pred, dtrain)
|
gpair <- obj(pred, dtrain)
|
||||||
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE = "xgboost")
|
.Call("XGBoosterBoostOneIter_R", booster_handle, dtrain, gpair$grad, gpair$hess, PACKAGE = "xgboost")
|
||||||
}
|
}
|
||||||
return(TRUE)
|
return(TRUE)
|
||||||
}
|
}
|
||||||
@ -144,16 +144,16 @@ xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
|
|||||||
# Evaluate one iteration.
|
# Evaluate one iteration.
|
||||||
# Returns a named vector of evaluation metrics
|
# Returns a named vector of evaluation metrics
|
||||||
# with the names in a 'datasetname-metricname' format.
|
# with the names in a 'datasetname-metricname' format.
|
||||||
xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
|
xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
|
||||||
if (class(booster) != "xgb.Booster.handle")
|
if (!identical(class(booster_handle), "xgb.Booster.handle"))
|
||||||
stop("first argument type must be xgb.Booster.handle")
|
stop("class of booster_handle must be xgb.Booster.handle")
|
||||||
|
|
||||||
if (length(watchlist) == 0)
|
if (length(watchlist) == 0)
|
||||||
return(NULL)
|
return(NULL)
|
||||||
|
|
||||||
evnames <- names(watchlist)
|
evnames <- names(watchlist)
|
||||||
if (is.null(feval)) {
|
if (is.null(feval)) {
|
||||||
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
|
msg <- .Call("XGBoosterEvalOneIter_R", booster_handle, as.integer(iter), watchlist,
|
||||||
as.list(evnames), PACKAGE = "xgboost")
|
as.list(evnames), PACKAGE = "xgboost")
|
||||||
msg <- stri_split_regex(msg, '(\\s+|:|\\s+)')[[1]][-1]
|
msg <- stri_split_regex(msg, '(\\s+|:|\\s+)')[[1]][-1]
|
||||||
res <- as.numeric(msg[c(FALSE,TRUE)]) # even indices are the values
|
res <- as.numeric(msg[c(FALSE,TRUE)]) # even indices are the values
|
||||||
@ -161,7 +161,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
|
|||||||
} else {
|
} else {
|
||||||
res <- sapply(seq_along(watchlist), function(j) {
|
res <- sapply(seq_along(watchlist), function(j) {
|
||||||
w <- watchlist[[j]]
|
w <- watchlist[[j]]
|
||||||
preds <- predict(booster, w) # predict using all trees
|
preds <- predict(booster_handle, w) # predict using all trees
|
||||||
eval_res <- feval(preds, w)
|
eval_res <- feval(preds, w)
|
||||||
out <- eval_res$value
|
out <- eval_res$value
|
||||||
names(out) <- paste0(evnames[j], "-", eval_res$metric)
|
names(out) <- paste0(evnames[j], "-", eval_res$metric)
|
||||||
@ -180,7 +180,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
|
|||||||
generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
|
generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
|
||||||
|
|
||||||
# cannot do it for rank
|
# cannot do it for rank
|
||||||
if (exists('objective', where=params) &&
|
if (exists('objective', where = params) &&
|
||||||
is.character(params$objective) &&
|
is.character(params$objective) &&
|
||||||
strtrim(params$objective, 5) == 'rank:') {
|
strtrim(params$objective, 5) == 'rank:') {
|
||||||
stop("\n\tAutomatic generation of CV-folds is not implemented for ranking!\n",
|
stop("\n\tAutomatic generation of CV-folds is not implemented for ranking!\n",
|
||||||
@ -195,7 +195,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
|
|||||||
# - For classification, need to convert y labels to factor before making the folds,
|
# - For classification, need to convert y labels to factor before making the folds,
|
||||||
# and then do stratification by factor levels.
|
# and then do stratification by factor levels.
|
||||||
# - For regression, leave y numeric and do stratification by quantiles.
|
# - For regression, leave y numeric and do stratification by quantiles.
|
||||||
if (exists('objective', where=params) &&
|
if (exists('objective', where = params) &&
|
||||||
is.character(params$objective)) {
|
is.character(params$objective)) {
|
||||||
# If 'objective' provided in params, assume that y is a classification label
|
# If 'objective' provided in params, assume that y is a classification label
|
||||||
# unless objective is reg:linear
|
# unless objective is reg:linear
|
||||||
@ -306,7 +306,7 @@ depr_par_lut <- matrix(c(
|
|||||||
'plot.width','plot_width',
|
'plot.width','plot_width',
|
||||||
'n_first_tree', 'trees',
|
'n_first_tree', 'trees',
|
||||||
'dummy', 'DUMMY'
|
'dummy', 'DUMMY'
|
||||||
), ncol=2, byrow = TRUE)
|
), ncol = 2, byrow = TRUE)
|
||||||
colnames(depr_par_lut) <- c('old', 'new')
|
colnames(depr_par_lut) <- c('old', 'new')
|
||||||
|
|
||||||
# Checks the dot-parameters for deprecated names
|
# Checks the dot-parameters for deprecated names
|
||||||
@ -331,7 +331,7 @@ check.deprecation <- function(..., env = parent.frame()) {
|
|||||||
if (!ex_match[i]) {
|
if (!ex_match[i]) {
|
||||||
warning("'", pars_par, "' was partially matched to '", old_par,"'")
|
warning("'", pars_par, "' was partially matched to '", old_par,"'")
|
||||||
}
|
}
|
||||||
.Deprecated(new_par, old=old_par, package = 'xgboost')
|
.Deprecated(new_par, old = old_par, package = 'xgboost')
|
||||||
if (new_par != 'NULL') {
|
if (new_par != 'NULL') {
|
||||||
eval(parse(text = paste(new_par, '<-', pars[[pars_par]])), envir = env)
|
eval(parse(text = paste(new_par, '<-', pars[[pars_par]])), envir = env)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,19 +1,19 @@
|
|||||||
# Construct an internal xgboost Booster and return a handle to it
|
# Construct an internal xgboost Booster and return a handle to it.
|
||||||
# internal utility function
|
# internal utility function
|
||||||
xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) {
|
||||||
if (typeof(cachelist) != "list" ||
|
if (typeof(cachelist) != "list" ||
|
||||||
any(sapply(cachelist, class) != 'xgb.DMatrix')) {
|
!all(sapply(cachelist, inherits, 'xgb.DMatrix'))) {
|
||||||
stop("xgb.Booster only accepts list of DMatrix as cachelist")
|
stop("cachelist must be a list of xgb.DMatrix objects")
|
||||||
}
|
}
|
||||||
|
|
||||||
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
|
handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost")
|
||||||
if (!is.null(modelfile)) {
|
if (!is.null(modelfile)) {
|
||||||
if (typeof(modelfile) == "character") {
|
if (typeof(modelfile) == "character") {
|
||||||
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
|
.Call("XGBoosterLoadModel_R", handle, modelfile[1], PACKAGE = "xgboost")
|
||||||
} else if (typeof(modelfile) == "raw") {
|
} else if (typeof(modelfile) == "raw") {
|
||||||
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
|
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
|
||||||
} else if (class(modelfile) == "xgb.Booster") {
|
} else if (inherits(modelfile, "xgb.Booster")) {
|
||||||
bst <- xgb.Booster.complete(modelfile, saveraw=TRUE)
|
bst <- xgb.Booster.complete(modelfile, saveraw = TRUE)
|
||||||
.Call("XGBoosterLoadModelFromRaw_R", handle, bst$raw, PACKAGE = "xgboost")
|
.Call("XGBoosterLoadModelFromRaw_R", handle, bst$raw, PACKAGE = "xgboost")
|
||||||
} else {
|
} else {
|
||||||
stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
|
stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object")
|
||||||
@ -37,10 +37,10 @@ xgb.handleToBooster <- function(handle, raw = NULL) {
|
|||||||
# Check whether xgb.Booster.handle is null
|
# Check whether xgb.Booster.handle is null
|
||||||
# internal utility function
|
# internal utility function
|
||||||
is.null.handle <- function(handle) {
|
is.null.handle <- function(handle) {
|
||||||
if (class(handle) != "xgb.Booster.handle")
|
if (!identical(class(handle), "xgb.Booster.handle"))
|
||||||
stop("argument type must be xgb.Booster.handle")
|
stop("argument type must be xgb.Booster.handle")
|
||||||
|
|
||||||
if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost"))
|
if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE = "xgboost"))
|
||||||
return(TRUE)
|
return(TRUE)
|
||||||
return(FALSE)
|
return(FALSE)
|
||||||
}
|
}
|
||||||
@ -78,8 +78,8 @@ xgb.get.handle <- function(object) {
|
|||||||
#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
|
#' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
|
||||||
#' should still work for such a model object since those methods would be using
|
#' should still work for such a model object since those methods would be using
|
||||||
#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
|
#' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
|
||||||
#' \code{xgb.Booster.complete} function once after loading a model as an R-object. That which would
|
#' \code{xgb.Booster.complete} function explicitely once after loading a model as an R-object.
|
||||||
#' prevent further reconstruction (potentially, multiple times) of an internal booster model.
|
#' That would prevent further repeated implicit reconstruction of an internal booster model.
|
||||||
#'
|
#'
|
||||||
#' @return
|
#' @return
|
||||||
#' An object of \code{xgb.Booster} class.
|
#' An object of \code{xgb.Booster} class.
|
||||||
@ -94,13 +94,14 @@ xgb.get.handle <- function(object) {
|
|||||||
#' bst1 <- readRDS("xgb.model.rds")
|
#' bst1 <- readRDS("xgb.model.rds")
|
||||||
#' # the handle is invalid:
|
#' # the handle is invalid:
|
||||||
#' print(bst1$handle)
|
#' print(bst1$handle)
|
||||||
|
#'
|
||||||
#' bst1 <- xgb.Booster.complete(bst1)
|
#' bst1 <- xgb.Booster.complete(bst1)
|
||||||
#' # now the handle points to a valid internal booster model:
|
#' # now the handle points to a valid internal booster model:
|
||||||
#' print(bst1$handle)
|
#' print(bst1$handle)
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
xgb.Booster.complete <- function(object, saveraw = TRUE) {
|
||||||
if (class(object) != "xgb.Booster")
|
if (!inherits(object, "xgb.Booster"))
|
||||||
stop("argument type must be xgb.Booster")
|
stop("argument type must be xgb.Booster")
|
||||||
|
|
||||||
if (is.null.handle(object$handle)) {
|
if (is.null.handle(object$handle)) {
|
||||||
@ -225,7 +226,7 @@ predict.xgb.Booster <- function(object, newdata, missing = NA,
|
|||||||
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {
|
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) {
|
||||||
|
|
||||||
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
object <- xgb.Booster.complete(object, saveraw = FALSE)
|
||||||
if (class(newdata) != "xgb.DMatrix")
|
if (!inherits(newdata, "xgb.DMatrix"))
|
||||||
newdata <- xgb.DMatrix(newdata, missing = missing)
|
newdata <- xgb.DMatrix(newdata, missing = missing)
|
||||||
if (is.null(ntreelimit))
|
if (is.null(ntreelimit))
|
||||||
ntreelimit <- NVL(object$best_ntreelimit, 0)
|
ntreelimit <- NVL(object$best_ntreelimit, 0)
|
||||||
@ -337,7 +338,7 @@ predict.xgb.Booster.handle <- function(object, ...) {
|
|||||||
xgb.attr <- function(object, name) {
|
xgb.attr <- function(object, name) {
|
||||||
if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name")
|
if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name")
|
||||||
handle <- xgb.get.handle(object)
|
handle <- xgb.get.handle(object)
|
||||||
.Call("XGBoosterGetAttr_R", handle, as.character(name[1]), PACKAGE="xgboost")
|
.Call("XGBoosterGetAttr_R", handle, as.character(name[1]), PACKAGE = "xgboost")
|
||||||
}
|
}
|
||||||
|
|
||||||
#' @rdname xgb.attr
|
#' @rdname xgb.attr
|
||||||
@ -354,7 +355,7 @@ xgb.attr <- function(object, name) {
|
|||||||
value <- as.character(value[1])
|
value <- as.character(value[1])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
.Call("XGBoosterSetAttr_R", handle, as.character(name[1]), value, PACKAGE="xgboost")
|
.Call("XGBoosterSetAttr_R", handle, as.character(name[1]), value, PACKAGE = "xgboost")
|
||||||
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
|
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
|
||||||
object$raw <- xgb.save.raw(object$handle)
|
object$raw <- xgb.save.raw(object$handle)
|
||||||
}
|
}
|
||||||
@ -365,10 +366,10 @@ xgb.attr <- function(object, name) {
|
|||||||
#' @export
|
#' @export
|
||||||
xgb.attributes <- function(object) {
|
xgb.attributes <- function(object) {
|
||||||
handle <- xgb.get.handle(object)
|
handle <- xgb.get.handle(object)
|
||||||
attr_names <- .Call("XGBoosterGetAttrNames_R", handle, PACKAGE="xgboost")
|
attr_names <- .Call("XGBoosterGetAttrNames_R", handle, PACKAGE = "xgboost")
|
||||||
if (is.null(attr_names)) return(NULL)
|
if (is.null(attr_names)) return(NULL)
|
||||||
res <- lapply(attr_names, function(x) {
|
res <- lapply(attr_names, function(x) {
|
||||||
.Call("XGBoosterGetAttr_R", handle, x, PACKAGE="xgboost")
|
.Call("XGBoosterGetAttr_R", handle, x, PACKAGE = "xgboost")
|
||||||
})
|
})
|
||||||
names(res) <- attr_names
|
names(res) <- attr_names
|
||||||
res
|
res
|
||||||
@ -393,7 +394,7 @@ xgb.attributes <- function(object) {
|
|||||||
})
|
})
|
||||||
handle <- xgb.get.handle(object)
|
handle <- xgb.get.handle(object)
|
||||||
for (i in seq_along(a)) {
|
for (i in seq_along(a)) {
|
||||||
.Call("XGBoosterSetAttr_R", handle, names(a[i]), a[[i]], PACKAGE="xgboost")
|
.Call("XGBoosterSetAttr_R", handle, names(a[i]), a[[i]], PACKAGE = "xgboost")
|
||||||
}
|
}
|
||||||
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
|
if (is(object, 'xgb.Booster') && !is.null(object$raw)) {
|
||||||
object$raw <- xgb.save.raw(object$handle)
|
object$raw <- xgb.save.raw(object$handle)
|
||||||
@ -442,8 +443,8 @@ xgb.attributes <- function(object) {
|
|||||||
object
|
object
|
||||||
}
|
}
|
||||||
|
|
||||||
# Extract # of trees in a model
|
# Extract the number of trees in a model.
|
||||||
# TODO: either add a getter to C-interface, or simply set an 'ntree' attribute after each iteration
|
# TODO: either add a getter to C-interface, or simply set an 'ntree' attribute after each iteration.
|
||||||
# internal utility function
|
# internal utility function
|
||||||
xgb.ntree <- function(bst) {
|
xgb.ntree <- function(bst) {
|
||||||
length(grep('^booster', xgb.dump(bst)))
|
length(grep('^booster', xgb.dump(bst)))
|
||||||
@ -470,7 +471,7 @@ xgb.ntree <- function(bst) {
|
|||||||
#'
|
#'
|
||||||
#' @method print xgb.Booster
|
#' @method print xgb.Booster
|
||||||
#' @export
|
#' @export
|
||||||
print.xgb.Booster <- function(x, verbose=FALSE, ...) {
|
print.xgb.Booster <- function(x, verbose = FALSE, ...) {
|
||||||
cat('##### xgb.Booster\n')
|
cat('##### xgb.Booster\n')
|
||||||
|
|
||||||
valid_handle <- is.null.handle(x$handle)
|
valid_handle <- is.null.handle(x$handle)
|
||||||
@ -479,7 +480,7 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
|
|||||||
|
|
||||||
cat('raw: ')
|
cat('raw: ')
|
||||||
if (!is.null(x$raw)) {
|
if (!is.null(x$raw)) {
|
||||||
cat(format(object.size(x$raw), units="auto"), '\n')
|
cat(format(object.size(x$raw), units = "auto"), '\n')
|
||||||
} else {
|
} else {
|
||||||
cat('NULL\n')
|
cat('NULL\n')
|
||||||
}
|
}
|
||||||
@ -493,7 +494,7 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
|
|||||||
cat( ' ',
|
cat( ' ',
|
||||||
paste(names(x$params),
|
paste(names(x$params),
|
||||||
paste0('"', unlist(x$params), '"'),
|
paste0('"', unlist(x$params), '"'),
|
||||||
sep=' = ', collapse=', '), '\n', sep='')
|
sep = ' = ', collapse = ', '), '\n', sep = '')
|
||||||
}
|
}
|
||||||
# TODO: need an interface to access all the xgboosts parameters
|
# TODO: need an interface to access all the xgboosts parameters
|
||||||
|
|
||||||
@ -505,9 +506,9 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
|
|||||||
if (verbose) {
|
if (verbose) {
|
||||||
cat( paste(paste0(' ',names(attrs)),
|
cat( paste(paste0(' ',names(attrs)),
|
||||||
paste0('"', unlist(attrs), '"'),
|
paste0('"', unlist(attrs), '"'),
|
||||||
sep=' = ', collapse='\n'), '\n', sep='')
|
sep = ' = ', collapse = '\n'), '\n', sep = '')
|
||||||
} else {
|
} else {
|
||||||
cat(' ', paste(names(attrs), collapse=', '), '\n', sep='')
|
cat(' ', paste(names(attrs), collapse = ', '), '\n', sep = '')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -522,16 +523,16 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) {
|
|||||||
if (!is.null(x$feature_names))
|
if (!is.null(x$feature_names))
|
||||||
cat('# of features:', length(x$feature_names), '\n')
|
cat('# of features:', length(x$feature_names), '\n')
|
||||||
|
|
||||||
cat('niter: ', x$niter, '\n', sep='')
|
cat('niter: ', x$niter, '\n', sep = '')
|
||||||
# TODO: uncomment when faster xgb.ntree is implemented
|
# TODO: uncomment when faster xgb.ntree is implemented
|
||||||
#cat('ntree: ', xgb.ntree(x), '\n', sep='')
|
#cat('ntree: ', xgb.ntree(x), '\n', sep='')
|
||||||
|
|
||||||
for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
|
for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks',
|
||||||
'evaluation_log','niter','feature_names'))) {
|
'evaluation_log','niter','feature_names'))) {
|
||||||
if (is.atomic(x[[n]])) {
|
if (is.atomic(x[[n]])) {
|
||||||
cat(n, ':', x[[n]], '\n', sep=' ')
|
cat(n, ':', x[[n]], '\n', sep = ' ')
|
||||||
} else {
|
} else {
|
||||||
cat(n, ':\n\t', sep=' ')
|
cat(n, ':\n\t', sep = ' ')
|
||||||
print(x[[n]])
|
print(x[[n]])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,14 +1,17 @@
|
|||||||
#' Construct xgb.DMatrix object
|
#' Construct xgb.DMatrix object
|
||||||
#'
|
#'
|
||||||
#' Contruct xgb.DMatrix object from dense matrix, sparse matrix
|
#' Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file.
|
||||||
#' or local file (that was created previously by saving an \code{xgb.DMatrix}).
|
#' Supported input file formats are either a libsvm text file or a binary file that was created previously by
|
||||||
|
#' \code{\link{xgb.DMatrix.save}}).
|
||||||
#'
|
#'
|
||||||
#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename
|
#' @param data a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
|
||||||
#' @param info a list of information of the xgb.DMatrix object
|
#' string representing a filename.
|
||||||
#' @param missing Missing is only used when input is dense matrix, pick a float
|
#' @param info a named list of additional information to store in the \code{xgb.DMatrix} object.
|
||||||
#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.
|
#' See \code{\link{setinfo}} for the specific allowed kinds of
|
||||||
#
|
#' @param missing a float value to represents missing values in data (used only when input is a dense matrix).
|
||||||
#' @param ... other information to pass to \code{info}.
|
#' It is useful when a 0 or some other extreme value represents missing values in data.
|
||||||
|
#' @param silent whether to suppress printing an informational message after loading from a file.
|
||||||
|
#' @param ... the \code{info} data could be passed directly as parameters, without creating an \code{info} list.
|
||||||
#'
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' data(agaricus.train, package='xgboost')
|
#' data(agaricus.train, package='xgboost')
|
||||||
@ -17,19 +20,19 @@
|
|||||||
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
#' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data')
|
||||||
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
#' dtrain <- xgb.DMatrix('xgb.DMatrix.data')
|
||||||
#' @export
|
#' @export
|
||||||
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
|
xgb.DMatrix <- function(data, info = list(), missing = NA, silent = FALSE, ...) {
|
||||||
cnames <- NULL
|
cnames <- NULL
|
||||||
if (typeof(data) == "character") {
|
if (typeof(data) == "character") {
|
||||||
if (length(data) > 1)
|
if (length(data) > 1)
|
||||||
stop("'data' has class 'character' and length ", length(data),
|
stop("'data' has class 'character' and length ", length(data),
|
||||||
".\n 'data' accepts either a numeric matrix or a single filename.")
|
".\n 'data' accepts either a numeric matrix or a single filename.")
|
||||||
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),
|
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(silent),
|
||||||
PACKAGE = "xgboost")
|
PACKAGE = "xgboost")
|
||||||
} else if (is.matrix(data)) {
|
} else if (is.matrix(data)) {
|
||||||
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing,
|
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing,
|
||||||
PACKAGE = "xgboost")
|
PACKAGE = "xgboost")
|
||||||
cnames <- colnames(data)
|
cnames <- colnames(data)
|
||||||
} else if (class(data) == "dgCMatrix") {
|
} else if (inherits(data, "dgCMatrix")) {
|
||||||
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, nrow(data),
|
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, nrow(data),
|
||||||
PACKAGE = "xgboost")
|
PACKAGE = "xgboost")
|
||||||
cnames <- colnames(data)
|
cnames <- colnames(data)
|
||||||
@ -51,10 +54,9 @@ xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
|
|||||||
# get dmatrix from data, label
|
# get dmatrix from data, label
|
||||||
# internal helper method
|
# internal helper method
|
||||||
xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
|
xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
|
||||||
inClass <- class(data)
|
if (inherits(data, "dgCMatrix") || is.matrix(data)) {
|
||||||
if ("dgCMatrix" %in% inClass || "matrix" %in% inClass ) {
|
|
||||||
if (is.null(label)) {
|
if (is.null(label)) {
|
||||||
stop("xgboost: need label when data is a matrix")
|
stop("label must be provided when data is a matrix")
|
||||||
}
|
}
|
||||||
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
|
dtrain <- xgb.DMatrix(data, label = label, missing = missing)
|
||||||
if (!is.null(weight)){
|
if (!is.null(weight)){
|
||||||
@ -64,11 +66,11 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
|
|||||||
if (!is.null(label)) {
|
if (!is.null(label)) {
|
||||||
warning("xgboost: label will be ignored.")
|
warning("xgboost: label will be ignored.")
|
||||||
}
|
}
|
||||||
if (inClass == "character") {
|
if (is.character(data)) {
|
||||||
dtrain <- xgb.DMatrix(data)
|
dtrain <- xgb.DMatrix(data[1])
|
||||||
} else if (inClass == "xgb.DMatrix") {
|
} else if (inherits(data, "xgb.DMatrix")) {
|
||||||
dtrain <- data
|
dtrain <- data
|
||||||
} else if ("data.frame" %in% inClass) {
|
} else if (inherits(data, "data.frame")) {
|
||||||
stop("xgboost doesn't support data.frame as input. Convert it to matrix first.")
|
stop("xgboost doesn't support data.frame as input. Convert it to matrix first.")
|
||||||
} else {
|
} else {
|
||||||
stop("xgboost: invalid input data")
|
stop("xgboost: invalid input data")
|
||||||
@ -98,8 +100,8 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
|
|||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
dim.xgb.DMatrix <- function(x) {
|
dim.xgb.DMatrix <- function(x) {
|
||||||
c(.Call("XGDMatrixNumRow_R", x, PACKAGE="xgboost"),
|
c(.Call("XGDMatrixNumRow_R", x, PACKAGE = "xgboost"),
|
||||||
.Call("XGDMatrixNumCol_R", x, PACKAGE="xgboost"))
|
.Call("XGDMatrixNumCol_R", x, PACKAGE = "xgboost"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -297,8 +299,8 @@ slice <- function(object, ...) UseMethod("slice")
|
|||||||
#' @rdname slice.xgb.DMatrix
|
#' @rdname slice.xgb.DMatrix
|
||||||
#' @export
|
#' @export
|
||||||
slice.xgb.DMatrix <- function(object, idxset, ...) {
|
slice.xgb.DMatrix <- function(object, idxset, ...) {
|
||||||
if (class(object) != "xgb.DMatrix") {
|
if (!inherits(object, "xgb.DMatrix")) {
|
||||||
stop("slice: first argument dtrain must be xgb.DMatrix")
|
stop("object must be xgb.DMatrix")
|
||||||
}
|
}
|
||||||
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, PACKAGE = "xgboost")
|
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, PACKAGE = "xgboost")
|
||||||
|
|
||||||
@ -317,7 +319,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
|
|||||||
|
|
||||||
#' @rdname slice.xgb.DMatrix
|
#' @rdname slice.xgb.DMatrix
|
||||||
#' @export
|
#' @export
|
||||||
`[.xgb.DMatrix` <- function(object, idxset, colset=NULL) {
|
`[.xgb.DMatrix` <- function(object, idxset, colset = NULL) {
|
||||||
slice(object, idxset)
|
slice(object, idxset)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -341,7 +343,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) {
|
|||||||
#'
|
#'
|
||||||
#' @method print xgb.DMatrix
|
#' @method print xgb.DMatrix
|
||||||
#' @export
|
#' @export
|
||||||
print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
|
print.xgb.DMatrix <- function(x, verbose = FALSE, ...) {
|
||||||
cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ')
|
cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ')
|
||||||
infos <- c()
|
infos <- c()
|
||||||
if(length(getinfo(x, 'label')) > 0) infos <- 'label'
|
if(length(getinfo(x, 'label')) > 0) infos <- 'label'
|
||||||
@ -353,7 +355,7 @@ print.xgb.DMatrix <- function(x, verbose=FALSE, ...) {
|
|||||||
cat(' colnames:')
|
cat(' colnames:')
|
||||||
if (verbose & !is.null(cnames)) {
|
if (verbose & !is.null(cnames)) {
|
||||||
cat("\n'")
|
cat("\n'")
|
||||||
cat(cnames, sep="','")
|
cat(cnames, sep = "','")
|
||||||
cat("'")
|
cat("'")
|
||||||
} else {
|
} else {
|
||||||
if (is.null(cnames)) cat(' no')
|
if (is.null(cnames)) cat(' no')
|
||||||
|
|||||||
@ -15,9 +15,9 @@
|
|||||||
xgb.DMatrix.save <- function(dmatrix, fname) {
|
xgb.DMatrix.save <- function(dmatrix, fname) {
|
||||||
if (typeof(fname) != "character")
|
if (typeof(fname) != "character")
|
||||||
stop("fname must be character")
|
stop("fname must be character")
|
||||||
if (class(dmatrix) != "xgb.DMatrix")
|
if (!inherits(dmatrix, "xgb.DMatrix"))
|
||||||
stop("the input data must be xgb.DMatrix")
|
stop("dmatrix must be xgb.DMatrix")
|
||||||
|
|
||||||
.Call("XGDMatrixSaveBinary_R", dmatrix, fname, 0L, PACKAGE = "xgboost")
|
.Call("XGDMatrixSaveBinary_R", dmatrix, fname[1], 0L, PACKAGE = "xgboost")
|
||||||
return(TRUE)
|
return(TRUE)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -130,13 +130,13 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
# stop("Either 'eval_metric' or 'feval' must be provided for CV")
|
# stop("Either 'eval_metric' or 'feval' must be provided for CV")
|
||||||
|
|
||||||
# Check the labels
|
# Check the labels
|
||||||
if ( (class(data) == 'xgb.DMatrix' && is.null(getinfo(data, 'label'))) ||
|
if ( (inherits(data, 'xgb.DMatrix') && is.null(getinfo(data, 'label'))) ||
|
||||||
(class(data) != 'xgb.DMatrix' && is.null(label)))
|
(!inherits(data, 'xgb.DMatrix') && is.null(label)))
|
||||||
stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix")
|
stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix")
|
||||||
|
|
||||||
# CV folds
|
# CV folds
|
||||||
if(!is.null(folds)) {
|
if(!is.null(folds)) {
|
||||||
if(class(folds) != "list" || length(folds) < 2)
|
if(!is.list(folds) || length(folds) < 2)
|
||||||
stop("'folds' must be a list with 2 or more elements that are vectors of indices for each CV-fold")
|
stop("'folds' must be a list with 2 or more elements that are vectors of indices for each CV-fold")
|
||||||
nfold <- length(folds)
|
nfold <- length(folds)
|
||||||
} else {
|
} else {
|
||||||
@ -153,7 +153,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
params <- c(params, list(silent = 1))
|
params <- c(params, list(silent = 1))
|
||||||
print_every_n <- max( as.integer(print_every_n), 1L)
|
print_every_n <- max( as.integer(print_every_n), 1L)
|
||||||
if (!has.callbacks(callbacks, 'cb.print.evaluation') && verbose) {
|
if (!has.callbacks(callbacks, 'cb.print.evaluation') && verbose) {
|
||||||
callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n, showsd=showsd))
|
callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n, showsd = showsd))
|
||||||
}
|
}
|
||||||
# evaluation log callback: always is on in CV
|
# evaluation log callback: always is on in CV
|
||||||
evaluation_log <- list()
|
evaluation_log <- list()
|
||||||
@ -165,12 +165,12 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
if (!is.null(early_stopping_rounds) &&
|
if (!is.null(early_stopping_rounds) &&
|
||||||
!has.callbacks(callbacks, 'cb.early.stop')) {
|
!has.callbacks(callbacks, 'cb.early.stop')) {
|
||||||
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
|
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
|
||||||
maximize=maximize, verbose=verbose))
|
maximize = maximize, verbose = verbose))
|
||||||
}
|
}
|
||||||
# CV-predictions callback
|
# CV-predictions callback
|
||||||
if (prediction &&
|
if (prediction &&
|
||||||
!has.callbacks(callbacks, 'cb.cv.predict')) {
|
!has.callbacks(callbacks, 'cb.cv.predict')) {
|
||||||
callbacks <- add.cb(callbacks, cb.cv.predict(save_models=FALSE))
|
callbacks <- add.cb(callbacks, cb.cv.predict(save_models = FALSE))
|
||||||
}
|
}
|
||||||
# Sort the callbacks into categories
|
# Sort the callbacks into categories
|
||||||
cb <- categorize.callbacks(callbacks)
|
cb <- categorize.callbacks(callbacks)
|
||||||
@ -182,7 +182,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
dtest <- slice(dall, folds[[k]])
|
dtest <- slice(dall, folds[[k]])
|
||||||
dtrain <- slice(dall, unlist(folds[-k]))
|
dtrain <- slice(dall, unlist(folds[-k]))
|
||||||
handle <- xgb.Booster.handle(params, list(dtrain, dtest))
|
handle <- xgb.Booster.handle(params, list(dtrain, dtest))
|
||||||
list(dtrain=dtrain, bst=handle, watchlist=list(train=dtrain, test=dtest), index=folds[[k]])
|
list(dtrain = dtrain, bst = handle, watchlist = list(train = dtrain, test=dtest), index = folds[[k]])
|
||||||
})
|
})
|
||||||
# a "basket" to collect some results from callbacks
|
# a "basket" to collect some results from callbacks
|
||||||
basket <- list()
|
basket <- list()
|
||||||
@ -212,7 +212,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
|
|
||||||
if (stop_condition) break
|
if (stop_condition) break
|
||||||
}
|
}
|
||||||
for (f in cb$finalize) f(finalize=TRUE)
|
for (f in cb$finalize) f(finalize = TRUE)
|
||||||
|
|
||||||
# the CV result
|
# the CV result
|
||||||
ret <- list(
|
ret <- list(
|
||||||
@ -254,8 +254,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing =
|
|||||||
#' @rdname print.xgb.cv
|
#' @rdname print.xgb.cv
|
||||||
#' @method print xgb.cv.synchronous
|
#' @method print xgb.cv.synchronous
|
||||||
#' @export
|
#' @export
|
||||||
print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) {
|
print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
|
||||||
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep='')
|
cat('##### xgb.cv ', length(x$folds), '-folds\n', sep = '')
|
||||||
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
if (!is.null(x$call)) {
|
if (!is.null(x$call)) {
|
||||||
@ -267,7 +267,7 @@ print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) {
|
|||||||
cat( ' ',
|
cat( ' ',
|
||||||
paste(names(x$params),
|
paste(names(x$params),
|
||||||
paste0('"', unlist(x$params), '"'),
|
paste0('"', unlist(x$params), '"'),
|
||||||
sep=' = ', collapse=', '), '\n', sep='')
|
sep = ' = ', collapse = ', '), '\n', sep = '')
|
||||||
}
|
}
|
||||||
if (!is.null(x$callbacks) && length(x$callbacks) > 0) {
|
if (!is.null(x$callbacks) && length(x$callbacks) > 0) {
|
||||||
cat('callbacks:\n')
|
cat('callbacks:\n')
|
||||||
@ -280,7 +280,7 @@ print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) {
|
|||||||
for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
|
for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
|
||||||
if (is.null(x[[n]]))
|
if (is.null(x[[n]]))
|
||||||
next
|
next
|
||||||
cat(n, ': ', x[[n]], '\n', sep='')
|
cat(n, ': ', x[[n]], '\n', sep = '')
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is.null(x$pred)) {
|
if (!is.null(x$pred)) {
|
||||||
|
|||||||
@ -39,19 +39,19 @@
|
|||||||
#' cat(xgb.dump(bst, with_stats = TRUE, dump_format='json'))
|
#' cat(xgb.dump(bst, with_stats = TRUE, dump_format='json'))
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE,
|
xgb.dump <- function(model, fname = NULL, fmap = "", with_stats=FALSE,
|
||||||
dump_format = c("text", "json"), ...) {
|
dump_format = c("text", "json"), ...) {
|
||||||
check.deprecation(...)
|
check.deprecation(...)
|
||||||
dump_format <- match.arg(dump_format)
|
dump_format <- match.arg(dump_format)
|
||||||
if (class(model) != "xgb.Booster")
|
if (!inherits(model, "xgb.Booster"))
|
||||||
stop("model: argument must be of type xgb.Booster")
|
stop("model: argument must be of type xgb.Booster")
|
||||||
if (!(class(fname) %in% c("character", "NULL") && length(fname) <= 1))
|
if (!(is.null(fname) || is.character(fname)))
|
||||||
stop("fname: argument must be of type character (when provided)")
|
stop("fname: argument must be a character string (when provided)")
|
||||||
if (!(class(fmap) %in% c("character", "NULL") && length(fmap) <= 1))
|
if (!(is.null(fmap) || is.character(fmap)))
|
||||||
stop("fmap: argument must be of type character (when provided)")
|
stop("fmap: argument must be a character string (when provided)")
|
||||||
|
|
||||||
model <- xgb.Booster.complete(model)
|
model <- xgb.Booster.complete(model)
|
||||||
model_dump <- .Call("XGBoosterDumpModel_R", model$handle, fmap, as.integer(with_stats),
|
model_dump <- .Call("XGBoosterDumpModel_R", model$handle, NVL(fmap, "")[1], as.integer(with_stats),
|
||||||
as.character(dump_format), PACKAGE = "xgboost")
|
as.character(dump_format), PACKAGE = "xgboost")
|
||||||
|
|
||||||
if (is.null(fname))
|
if (is.null(fname))
|
||||||
@ -65,7 +65,7 @@ xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE,
|
|||||||
if (is.null(fname)) {
|
if (is.null(fname)) {
|
||||||
return(model_dump)
|
return(model_dump)
|
||||||
} else {
|
} else {
|
||||||
writeLines(model_dump, fname)
|
writeLines(model_dump, fname[1])
|
||||||
return(TRUE)
|
return(TRUE)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -58,13 +58,13 @@ xgb.importance <- function(feature_names = NULL, model = NULL,
|
|||||||
if (!(is.null(data) && is.null(label) && is.null(target)))
|
if (!(is.null(data) && is.null(label) && is.null(target)))
|
||||||
warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
|
warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
|
||||||
|
|
||||||
if (class(model) != "xgb.Booster")
|
if (!inherits(model, "xgb.Booster"))
|
||||||
stop("Either 'model' has to be an object of class xgb.Booster")
|
stop("model: must be an object of class xgb.Booster")
|
||||||
|
|
||||||
if (is.null(feature_names) && !is.null(model$feature_names))
|
if (is.null(feature_names) && !is.null(model$feature_names))
|
||||||
feature_names <- model$feature_names
|
feature_names <- model$feature_names
|
||||||
|
|
||||||
if (!class(feature_names) %in% c("character", "NULL"))
|
if (!(is.null(feature_names) || is.character(feature_names)))
|
||||||
stop("feature_names: Has to be a character vector")
|
stop("feature_names: Has to be a character vector")
|
||||||
|
|
||||||
model_text_dump <- xgb.dump(model = model, with_stats = TRUE)
|
model_text_dump <- xgb.dump(model = model, with_stats = TRUE)
|
||||||
@ -76,6 +76,8 @@ xgb.importance <- function(feature_names = NULL, model = NULL,
|
|||||||
as.numeric
|
as.numeric
|
||||||
if(is.null(feature_names))
|
if(is.null(feature_names))
|
||||||
feature_names <- seq(to = length(weights))
|
feature_names <- seq(to = length(weights))
|
||||||
|
if (length(feature_names) != length(weights))
|
||||||
|
stop("feature_names has less elements than there are features used in the model")
|
||||||
result <- data.table(Feature = feature_names, Weight = weights)[order(-abs(Weight))]
|
result <- data.table(Feature = feature_names, Weight = weights)[order(-abs(Weight))]
|
||||||
} else {
|
} else {
|
||||||
# tree model
|
# tree model
|
||||||
|
|||||||
@ -14,6 +14,8 @@
|
|||||||
#' It could be useful, e.g., in multiclass classification to get only
|
#' It could be useful, e.g., in multiclass classification to get only
|
||||||
#' the trees of one certain class. IMPORTANT: the tree index in xgboost models
|
#' the trees of one certain class. IMPORTANT: the tree index in xgboost models
|
||||||
#' is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).
|
#' is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).
|
||||||
|
#' @param use_int_id a logical flag indicating whether nodes in columns "Yes", "No", "Missing" should be
|
||||||
|
#' represented as integers (when FALSE) or as "Tree-Node" character strings (when FALSE).
|
||||||
#' @param ... currently not used.
|
#' @param ... currently not used.
|
||||||
#'
|
#'
|
||||||
#' @return
|
#' @return
|
||||||
@ -22,9 +24,9 @@
|
|||||||
#' The columns of the \code{data.table} are:
|
#' The columns of the \code{data.table} are:
|
||||||
#'
|
#'
|
||||||
#' \itemize{
|
#' \itemize{
|
||||||
#' \item \code{Tree}: ID of a tree in a model (integer)
|
#' \item \code{Tree}: integer ID of a tree in a model (zero-based index)
|
||||||
#' \item \code{Node}: integer ID of a node in a tree (integer)
|
#' \item \code{Node}: integer ID of a node in a tree (zero-based index)
|
||||||
#' \item \code{ID}: identifier of a node in a model (character)
|
#' \item \code{ID}: character identifier of a node in a model (only when \code{use_int_id=FALSE})
|
||||||
#' \item \code{Feature}: for a branch node, it's a feature id or name (when available);
|
#' \item \code{Feature}: for a branch node, it's a feature id or name (when available);
|
||||||
#' for a leaf note, it simply labels it as \code{'Leaf'}
|
#' for a leaf note, it simply labels it as \code{'Leaf'}
|
||||||
#' \item \code{Split}: location of the split for a branch node (split condition is always "less than")
|
#' \item \code{Split}: location of the split for a branch node (split condition is always "less than")
|
||||||
@ -36,6 +38,10 @@
|
|||||||
#' or collected by a leaf during training.
|
#' or collected by a leaf during training.
|
||||||
#' }
|
#' }
|
||||||
#'
|
#'
|
||||||
|
#' When \code{use_int_id=FALSE}, columns "Yes", "No", and "Missing" point to model-wide node identifiers
|
||||||
|
#' in the "ID" column. When \code{use_int_id=TRUE}, those columns point to node identifiers from
|
||||||
|
#' the corresponding trees in the "Node" column.
|
||||||
|
#'
|
||||||
#' @examples
|
#' @examples
|
||||||
#' # Basic use:
|
#' # Basic use:
|
||||||
#'
|
#'
|
||||||
@ -45,8 +51,9 @@
|
|||||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||||
#'
|
#'
|
||||||
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
#' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
||||||
#' # This bst has feature_names stored in it, so those would be used when
|
#'
|
||||||
#' # the feature_names parameter is not provided:
|
#' # This bst model already has feature_names stored with it, so those would be used when
|
||||||
|
#' # feature_names is not set:
|
||||||
#' (dt <- xgb.model.dt.tree(model = bst))
|
#' (dt <- xgb.model.dt.tree(model = bst))
|
||||||
#'
|
#'
|
||||||
#' # How to match feature names of splits that are following a current 'Yes' branch:
|
#' # How to match feature names of splits that are following a current 'Yes' branch:
|
||||||
@ -55,24 +62,24 @@
|
|||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||||
trees = NULL, ...){
|
trees = NULL, use_int_id = FALSE, ...){
|
||||||
check.deprecation(...)
|
check.deprecation(...)
|
||||||
|
|
||||||
if (class(model) != "xgb.Booster" & class(text) != "character") {
|
if (!inherits(model, "xgb.Booster") & !is.character(text)) {
|
||||||
stop("Either 'model' has to be an object of class xgb.Booster\n",
|
stop("Either 'model' must be an object of class xgb.Booster\n",
|
||||||
" or 'text' has to be a character vector with the result of xgb.dump\n",
|
" or 'text' must be a character vector with the result of xgb.dump\n",
|
||||||
" (or NULL if the model was provided).")
|
" (or NULL if 'model' was provided).")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is.null(feature_names) && !is.null(model) && !is.null(model$feature_names))
|
if (is.null(feature_names) && !is.null(model) && !is.null(model$feature_names))
|
||||||
feature_names <- model$feature_names
|
feature_names <- model$feature_names
|
||||||
|
|
||||||
if (!class(feature_names) %in% c("character", "NULL")) {
|
if (!(is.null(feature_names) || is.character(feature_names))) {
|
||||||
stop("feature_names: Has to be a character vector")
|
stop("feature_names: must be a character vector")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!class(trees) %in% c("integer", "numeric", "NULL")) {
|
if (!(is.null(trees) || is.numeric(trees))) {
|
||||||
stop("trees: Has to be a vector of integers.")
|
stop("trees: must be a vector of integers.")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is.null(text)){
|
if (is.null(text)){
|
||||||
@ -86,11 +93,11 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
|||||||
|
|
||||||
position <- which(!is.na(stri_match_first_regex(text, "booster")))
|
position <- which(!is.na(stri_match_first_regex(text, "booster")))
|
||||||
|
|
||||||
add.tree.id <- function(x, i) paste(i, x, sep = "-")
|
add.tree.id <- function(node, tree) if (use_int_id) node else paste(tree, node, sep = "-")
|
||||||
|
|
||||||
anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
|
anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?"
|
||||||
|
|
||||||
td <- data.table(t=text)
|
td <- data.table(t = text)
|
||||||
td[position, Tree := 1L]
|
td[position, Tree := 1L]
|
||||||
td[, Tree := cumsum(ifelse(is.na(Tree), 0L, Tree)) - 1L]
|
td[, Tree := cumsum(ifelse(is.na(Tree), 0L, Tree)) - 1L]
|
||||||
|
|
||||||
@ -102,32 +109,43 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
|||||||
td <- td[Tree %in% trees & !grepl('^booster', t)]
|
td <- td[Tree %in% trees & !grepl('^booster', t)]
|
||||||
|
|
||||||
td[, Node := stri_match_first_regex(t, "(\\d+):")[,2] %>% as.integer ]
|
td[, Node := stri_match_first_regex(t, "(\\d+):")[,2] %>% as.integer ]
|
||||||
td[, ID := add.tree.id(Node, Tree)]
|
if (!use_int_id) td[, ID := add.tree.id(Node, Tree)]
|
||||||
td[, isLeaf := !is.na(stri_match_first_regex(t, "leaf"))]
|
td[, isLeaf := !is.na(stri_match_first_regex(t, "leaf"))]
|
||||||
|
|
||||||
# parse branch lines
|
# parse branch lines
|
||||||
td[isLeaf==FALSE, c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover") := {
|
branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
|
||||||
rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
|
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
||||||
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover")
|
||||||
# skip some indices with spurious capture groups from anynumber_regex
|
td[isLeaf == FALSE,
|
||||||
xtr <- stri_match_first_regex(t, rx)[, c(2,3,5,6,7,8,10)]
|
(branch_cols) := {
|
||||||
xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
|
# skip some indices with spurious capture groups from anynumber_regex
|
||||||
lapply(1:ncol(xtr), function(i) xtr[,i])
|
xtr <- stri_match_first_regex(t, branch_rx)[, c(2,3,5,6,7,8,10), drop = FALSE]
|
||||||
}]
|
xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree)
|
||||||
|
lapply(1:ncol(xtr), function(i) xtr[,i])
|
||||||
|
}]
|
||||||
# assign feature_names when available
|
# assign feature_names when available
|
||||||
td[isLeaf==FALSE & !is.null(feature_names),
|
if (!is.null(feature_names)) {
|
||||||
Feature := feature_names[as.numeric(Feature) + 1] ]
|
if (length(feature_names) <= max(as.numeric(td$Feature), na.rm = TRUE))
|
||||||
|
stop("feature_names has less elements than there are features used in the model")
|
||||||
|
td[isLeaf == FALSE, Feature := feature_names[as.numeric(Feature) + 1] ]
|
||||||
|
}
|
||||||
|
|
||||||
# parse leaf lines
|
# parse leaf lines
|
||||||
td[isLeaf==TRUE, c("Feature", "Quality", "Cover") := {
|
leaf_rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
||||||
rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
leaf_cols <- c("Feature", "Quality", "Cover")
|
||||||
xtr <- stri_match_first_regex(t, rx)[, c(2,4)]
|
td[isLeaf == TRUE,
|
||||||
c("Leaf", lapply(1:ncol(xtr), function(i) xtr[,i]))
|
(leaf_cols) := {
|
||||||
}]
|
xtr <- stri_match_first_regex(t, leaf_rx)[, c(2,4)]
|
||||||
|
c("Leaf", lapply(1:ncol(xtr), function(i) xtr[,i]))
|
||||||
|
}]
|
||||||
|
|
||||||
# convert some columns to numeric
|
# convert some columns to numeric
|
||||||
numeric_cols <- c("Split", "Quality", "Cover")
|
numeric_cols <- c("Split", "Quality", "Cover")
|
||||||
td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols=numeric_cols]
|
td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols]
|
||||||
|
if (use_int_id) {
|
||||||
|
int_cols <- c("Yes", "No", "Missing")
|
||||||
|
td[, (int_cols) := lapply(.SD, as.integer), .SDcols = int_cols]
|
||||||
|
}
|
||||||
|
|
||||||
td[, t := NULL]
|
td[, t := NULL]
|
||||||
td[, isLeaf := NULL]
|
td[, isLeaf := NULL]
|
||||||
|
|||||||
@ -63,7 +63,7 @@
|
|||||||
xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.depth", "med.weight"),
|
xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.depth", "med.weight"),
|
||||||
plot = TRUE, ...) {
|
plot = TRUE, ...) {
|
||||||
|
|
||||||
if (!(class(model) == "xgb.Booster" || is.data.table(model)))
|
if (!(inherits(model, "xgb.Booster") || is.data.table(model)))
|
||||||
stop("model: Has to be either an xgb.Booster model generaged by the xgb.train function\n",
|
stop("model: Has to be either an xgb.Booster model generaged by the xgb.train function\n",
|
||||||
"or a data.table result of the xgb.importance function")
|
"or a data.table result of the xgb.importance function")
|
||||||
|
|
||||||
@ -73,14 +73,14 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d
|
|||||||
which <- match.arg(which)
|
which <- match.arg(which)
|
||||||
|
|
||||||
dt_tree <- model
|
dt_tree <- model
|
||||||
if (class(model) == "xgb.Booster")
|
if (inherits(model, "xgb.Booster"))
|
||||||
dt_tree <- xgb.model.dt.tree(model = model)
|
dt_tree <- xgb.model.dt.tree(model = model)
|
||||||
|
|
||||||
if (!all(c("Feature", "Tree", "ID", "Yes", "No", "Cover") %in% colnames(dt_tree)))
|
if (!all(c("Feature", "Tree", "ID", "Yes", "No", "Cover") %in% colnames(dt_tree)))
|
||||||
stop("Model tree columns are not as expected!\n",
|
stop("Model tree columns are not as expected!\n",
|
||||||
" Note that this function works only for tree models.")
|
" Note that this function works only for tree models.")
|
||||||
|
|
||||||
dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight=Quality)], by = "ID")
|
dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Quality)], by = "ID")
|
||||||
setkeyv(dt_depths, c("Tree", "ID"))
|
setkeyv(dt_depths, c("Tree", "ID"))
|
||||||
# count by depth levels, and also calculate average cover at a depth
|
# count by depth levels, and also calculate average cover at a depth
|
||||||
dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth]
|
dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth]
|
||||||
@ -89,13 +89,13 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d
|
|||||||
if (plot) {
|
if (plot) {
|
||||||
if (which == "2x1") {
|
if (which == "2x1") {
|
||||||
op <- par(no.readonly = TRUE)
|
op <- par(no.readonly = TRUE)
|
||||||
par(mfrow=c(2,1),
|
par(mfrow = c(2,1),
|
||||||
oma = c(3,1,3,1) + 0.1,
|
oma = c(3,1,3,1) + 0.1,
|
||||||
mar = c(1,4,1,0) + 0.1)
|
mar = c(1,4,1,0) + 0.1)
|
||||||
|
|
||||||
dt_summaries[, barplot(N, border=NA, ylab = 'Number of leafs', ...)]
|
dt_summaries[, barplot(N, border = NA, ylab = 'Number of leafs', ...)]
|
||||||
|
|
||||||
dt_summaries[, barplot(Cover, border=NA, ylab = "Weighted cover", names.arg=Depth, ...)]
|
dt_summaries[, barplot(Cover, border = NA, ylab = "Weighted cover", names.arg = Depth, ...)]
|
||||||
|
|
||||||
title("Model complexity", xlab = "Leaf depth", outer = TRUE, line = 1)
|
title("Model complexity", xlab = "Leaf depth", outer = TRUE, line = 1)
|
||||||
par(op)
|
par(op)
|
||||||
@ -119,8 +119,8 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d
|
|||||||
get.leaf.depth <- function(dt_tree) {
|
get.leaf.depth <- function(dt_tree) {
|
||||||
# extract tree graph's edges
|
# extract tree graph's edges
|
||||||
dt_edges <- rbindlist(list(
|
dt_edges <- rbindlist(list(
|
||||||
dt_tree[Feature != "Leaf", .(ID, To=Yes, Tree)],
|
dt_tree[Feature != "Leaf", .(ID, To = Yes, Tree)],
|
||||||
dt_tree[Feature != "Leaf", .(ID, To=No, Tree)]
|
dt_tree[Feature != "Leaf", .(ID, To = No, Tree)]
|
||||||
))
|
))
|
||||||
# whether "To" is a leaf:
|
# whether "To" is a leaf:
|
||||||
dt_edges <-
|
dt_edges <-
|
||||||
|
|||||||
@ -61,8 +61,8 @@
|
|||||||
xgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure = NULL,
|
xgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure = NULL,
|
||||||
rel_to_first = FALSE, left_margin = 10, cex = NULL, plot = TRUE, ...) {
|
rel_to_first = FALSE, left_margin = 10, cex = NULL, plot = TRUE, ...) {
|
||||||
check.deprecation(...)
|
check.deprecation(...)
|
||||||
if (!"data.table" %in% class(importance_matrix)) {
|
if (!is.data.table(importance_matrix)) {
|
||||||
stop("importance_matrix: Should be a data.table.")
|
stop("importance_matrix: must be a data.table")
|
||||||
}
|
}
|
||||||
|
|
||||||
imp_names <- colnames(importance_matrix)
|
imp_names <- colnames(importance_matrix)
|
||||||
@ -107,12 +107,12 @@ xgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure
|
|||||||
|
|
||||||
# reverse the order of rows to have the highest ranked at the top
|
# reverse the order of rows to have the highest ranked at the top
|
||||||
importance_matrix[nrow(importance_matrix):1,
|
importance_matrix[nrow(importance_matrix):1,
|
||||||
barplot(Importance, horiz=TRUE, border=NA, cex.names=cex,
|
barplot(Importance, horiz = TRUE, border = NA, cex.names = cex,
|
||||||
names.arg=Feature, las=1, ...)]
|
names.arg = Feature, las = 1, ...)]
|
||||||
grid(NULL, NA)
|
grid(NULL, NA)
|
||||||
# redraw over the grid
|
# redraw over the grid
|
||||||
importance_matrix[nrow(importance_matrix):1,
|
importance_matrix[nrow(importance_matrix):1,
|
||||||
barplot(Importance, horiz=TRUE, border=NA, add=TRUE)]
|
barplot(Importance, horiz = TRUE, border = NA, add = TRUE)]
|
||||||
par(op)
|
par(op)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -11,7 +11,7 @@
|
|||||||
#' @param plot_width the width of the diagram in pixels.
|
#' @param plot_width the width of the diagram in pixels.
|
||||||
#' @param plot_height the height of the diagram in pixels.
|
#' @param plot_height the height of the diagram in pixels.
|
||||||
#' @param render a logical flag for whether the graph should be rendered (see Value).
|
#' @param render a logical flag for whether the graph should be rendered (see Value).
|
||||||
#' @param show_node_id a logical flag for whether to include node id's in the graph.
|
#' @param show_node_id a logical flag for whether to show node id's in the graph.
|
||||||
#' @param ... currently not used.
|
#' @param ... currently not used.
|
||||||
#'
|
#'
|
||||||
#' @details
|
#' @details
|
||||||
@ -53,17 +53,25 @@
|
|||||||
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
#' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||||
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||||
#' # plot all the trees
|
#' # plot all the trees
|
||||||
#' xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst)
|
#' xgb.plot.tree(model = bst)
|
||||||
#' # plot only the first tree and include the node ID:
|
#' # plot only the first tree and display the node ID:
|
||||||
#' xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst,
|
#' xgb.plot.tree(model = bst, trees = 0, show_node_id = TRUE)
|
||||||
#' trees = 0, show_node_id = TRUE)
|
#'
|
||||||
|
#' \dontrun{
|
||||||
|
#' # Below is an example of how to save this plot to a file.
|
||||||
|
#' # Note that for `export_graph` to work, the DiagrammeRsvg and rsvg packages must also be installed.
|
||||||
|
#' library(DiagrammeR)
|
||||||
|
#' gr <- xgb.plot.tree(model=bst, trees=0:1, render=FALSE)
|
||||||
|
#' export_graph(gr, 'tree.pdf', width=1500, height=1900)
|
||||||
|
#' export_graph(gr, 'tree.png', width=1500, height=1900)
|
||||||
|
#' }
|
||||||
#'
|
#'
|
||||||
#' @export
|
#' @export
|
||||||
xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL,
|
xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL,
|
||||||
render = TRUE, show_node_id = FALSE, ...){
|
render = TRUE, show_node_id = FALSE, ...){
|
||||||
check.deprecation(...)
|
check.deprecation(...)
|
||||||
if (class(model) != "xgb.Booster") {
|
if (!inherits(model, "xgb.Booster")) {
|
||||||
stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.")
|
stop("model: Has to be an object of class xgb.Booster")
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
|
if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
|
||||||
|
|||||||
@ -32,10 +32,11 @@
|
|||||||
xgb.save <- function(model, fname) {
|
xgb.save <- function(model, fname) {
|
||||||
if (typeof(fname) != "character")
|
if (typeof(fname) != "character")
|
||||||
stop("fname must be character")
|
stop("fname must be character")
|
||||||
if (class(model) != "xgb.Booster")
|
if (!inherits(model, "xgb.Booster")) {
|
||||||
stop("the input must be xgb.Booster. Use xgb.DMatrix.save to save xgb.DMatrix object.")
|
stop("model must be xgb.Booster.",
|
||||||
|
if (inherits(model, "xgb.DMatrix")) " Use xgb.DMatrix.save to save an xgb.DMatrix object." else "")
|
||||||
|
}
|
||||||
model <- xgb.Booster.complete(model, saveraw = FALSE)
|
model <- xgb.Booster.complete(model, saveraw = FALSE)
|
||||||
.Call("XGBoosterSaveModel_R", model$handle, fname, PACKAGE = "xgboost")
|
.Call("XGBoosterSaveModel_R", model$handle, fname[1], PACKAGE = "xgboost")
|
||||||
return(TRUE)
|
return(TRUE)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -233,7 +233,7 @@
|
|||||||
#' @rdname xgb.train
|
#' @rdname xgb.train
|
||||||
#' @export
|
#' @export
|
||||||
xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||||
obj = NULL, feval = NULL, verbose = 1, print_every_n=1L,
|
obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
|
||||||
early_stopping_rounds = NULL, maximize = NULL,
|
early_stopping_rounds = NULL, maximize = NULL,
|
||||||
save_period = NULL, save_name = "xgboost.model",
|
save_period = NULL, save_name = "xgboost.model",
|
||||||
xgb_model = NULL, callbacks = list(), ...) {
|
xgb_model = NULL, callbacks = list(), ...) {
|
||||||
@ -247,11 +247,11 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
|||||||
|
|
||||||
# data & watchlist checks
|
# data & watchlist checks
|
||||||
dtrain <- data
|
dtrain <- data
|
||||||
if (class(dtrain) != "xgb.DMatrix")
|
if (!inherits(dtrain, "xgb.DMatrix"))
|
||||||
stop("second argument dtrain must be xgb.DMatrix")
|
stop("second argument dtrain must be xgb.DMatrix")
|
||||||
if (length(watchlist) > 0) {
|
if (length(watchlist) > 0) {
|
||||||
if (typeof(watchlist) != "list" ||
|
if (typeof(watchlist) != "list" ||
|
||||||
!all(sapply(watchlist, class) == "xgb.DMatrix"))
|
!all(sapply(watchlist, inherits, 'xgb.DMatrix')))
|
||||||
stop("watchlist must be a list of xgb.DMatrix elements")
|
stop("watchlist must be a list of xgb.DMatrix elements")
|
||||||
evnames <- names(watchlist)
|
evnames <- names(watchlist)
|
||||||
if (is.null(evnames) || any(evnames == ""))
|
if (is.null(evnames) || any(evnames == ""))
|
||||||
@ -281,7 +281,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
|||||||
if (!is.null(early_stopping_rounds) &&
|
if (!is.null(early_stopping_rounds) &&
|
||||||
!has.callbacks(callbacks, 'cb.early.stop')) {
|
!has.callbacks(callbacks, 'cb.early.stop')) {
|
||||||
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
|
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
|
||||||
maximize=maximize, verbose=verbose))
|
maximize = maximize, verbose = verbose))
|
||||||
}
|
}
|
||||||
# Sort the callbacks into categories
|
# Sort the callbacks into categories
|
||||||
cb <- categorize.callbacks(callbacks)
|
cb <- categorize.callbacks(callbacks)
|
||||||
@ -332,7 +332,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
|||||||
|
|
||||||
if (stop_condition) break
|
if (stop_condition) break
|
||||||
}
|
}
|
||||||
for (f in cb$finalize) f(finalize=TRUE)
|
for (f in cb$finalize) f(finalize = TRUE)
|
||||||
|
|
||||||
bst <- xgb.Booster.complete(bst, saveraw = TRUE)
|
bst <- xgb.Booster.complete(bst, saveraw = TRUE)
|
||||||
|
|
||||||
@ -343,7 +343,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
|||||||
if (length(evaluation_log) > 0 &&
|
if (length(evaluation_log) > 0 &&
|
||||||
nrow(evaluation_log) > 0) {
|
nrow(evaluation_log) > 0) {
|
||||||
# include the previous compatible history when available
|
# include the previous compatible history when available
|
||||||
if (class(xgb_model) == 'xgb.Booster' &&
|
if (inherits(xgb_model, 'xgb.Booster') &&
|
||||||
!is_update &&
|
!is_update &&
|
||||||
!is.null(xgb_model$evaluation_log) &&
|
!is.null(xgb_model$evaluation_log) &&
|
||||||
all.equal(colnames(evaluation_log),
|
all.equal(colnames(evaluation_log),
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# Simple interface for training an xgboost model that wraps \code{xgb.train}
|
# Simple interface for training an xgboost model that wraps \code{xgb.train}.
|
||||||
# Its documentation is combined with xgb.train.
|
# Its documentation is combined with xgb.train.
|
||||||
#
|
#
|
||||||
#' @rdname xgb.train
|
#' @rdname xgb.train
|
||||||
@ -7,7 +7,7 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
|
|||||||
params = list(), nrounds,
|
params = list(), nrounds,
|
||||||
verbose = 1, print_every_n = 1L,
|
verbose = 1, print_every_n = 1L,
|
||||||
early_stopping_rounds = NULL, maximize = NULL,
|
early_stopping_rounds = NULL, maximize = NULL,
|
||||||
save_period = 0, save_name = "xgboost.model",
|
save_period = NULL, save_name = "xgboost.model",
|
||||||
xgb_model = NULL, callbacks = list(), ...) {
|
xgb_model = NULL, callbacks = list(), ...) {
|
||||||
|
|
||||||
dtrain <- xgb.get.DMatrix(data, label, missing, weight)
|
dtrain <- xgb.get.DMatrix(data, label, missing, weight)
|
||||||
|
|||||||
@ -19,49 +19,6 @@ We are [on CRAN](https://cran.r-project.org/web/packages/xgboost/index.html) now
|
|||||||
install.packages('xgboost')
|
install.packages('xgboost')
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also install from our weekly updated drat repo:
|
|
||||||
```r
|
|
||||||
install.packages("xgboost", repos=c("http://dmlc.ml/drat/", getOption("repos")), type="source")
|
|
||||||
```
|
|
||||||
|
|
||||||
***Important*** Due to the usage of submodule, `install_github` is no longer support to install the
|
|
||||||
latest version of R package.
|
|
||||||
For up-to-date version, please install from github.
|
|
||||||
|
|
||||||
Windows users will need to install [RTools](https://cran.r-project.org/bin/windows/Rtools/) first. They also need to download [MinGW-W64](http://iweb.dl.sourceforge.net/project/mingw-w64/Toolchains%20targetting%20Win32/Personal%20Builds/mingw-builds/installer/mingw-w64-install.exe) using x86_64 architecture during installation.
|
|
||||||
|
|
||||||
Run the following command to add MinGW to PATH in Windows if not already added.
|
|
||||||
|
|
||||||
```cmd
|
|
||||||
PATH %PATH%;C:\Program Files\mingw-w64\x86_64-5.3.0-posix-seh-rt_v4-rev0\mingw64\bin
|
|
||||||
```
|
|
||||||
|
|
||||||
To compile xgboost at the root of your storage, run the following bash script.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone --recursive https://github.com/dmlc/xgboost
|
|
||||||
cd xgboost
|
|
||||||
git submodule init
|
|
||||||
git submodule update
|
|
||||||
alias make='mingw32-make'
|
|
||||||
cd dmlc-core
|
|
||||||
make -j4
|
|
||||||
cd ../rabit
|
|
||||||
make lib/librabit_empty.a -j4
|
|
||||||
cd ..
|
|
||||||
cp make/mingw64.mk config.mk
|
|
||||||
make -j4
|
|
||||||
```
|
|
||||||
|
|
||||||
Run the following R script to install xgboost package from the root directory.
|
|
||||||
|
|
||||||
```r
|
|
||||||
install.packages('devtools') # if not installed
|
|
||||||
setwd('C:/xgboost/')
|
|
||||||
library(devtools)
|
|
||||||
install('R-package')
|
|
||||||
```
|
|
||||||
|
|
||||||
For more detailed installation instructions, please see [here](http://xgboost.readthedocs.org/en/latest/build.html#r-package-installation).
|
For more detailed installation instructions, please see [here](http://xgboost.readthedocs.org/en/latest/build.html#r-package-installation).
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
@ -69,3 +26,8 @@ Examples
|
|||||||
|
|
||||||
* Please visit [walk through example](demo).
|
* Please visit [walk through example](demo).
|
||||||
* See also the [example scripts](../demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](../demo/kaggle-higgs/speedtest.R) on this dataset and the one related to [Otto challenge](../demo/kaggle-otto), including a [RMarkdown documentation](../demo/kaggle-otto/understandingXGBoostModel.Rmd).
|
* See also the [example scripts](../demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](../demo/kaggle-higgs/speedtest.R) on this dataset and the one related to [Otto challenge](../demo/kaggle-otto), including a [RMarkdown documentation](../demo/kaggle-otto/understandingXGBoostModel.Rmd).
|
||||||
|
|
||||||
|
Development
|
||||||
|
-----------
|
||||||
|
|
||||||
|
* See the [R Package section](https://xgboost.readthedocs.io/en/latest/how_to/contribute.html#r-package) of the contributiors guide.
|
||||||
|
|||||||
@ -28,8 +28,8 @@ E.g., when an \code{xgb.Booster} model is saved as an R object and then is loade
|
|||||||
its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
|
its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods
|
||||||
should still work for such a model object since those methods would be using
|
should still work for such a model object since those methods would be using
|
||||||
\code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
|
\code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the
|
||||||
\code{xgb.Booster.complete} function once after loading a model as an R-object. That which would
|
\code{xgb.Booster.complete} function explicitely once after loading a model as an R-object.
|
||||||
prevent further reconstruction (potentially, multiple times) of an internal booster model.
|
That would prevent further repeated implicit reconstruction of an internal booster model.
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
|
|
||||||
@ -41,6 +41,7 @@ saveRDS(bst, "xgb.model.rds")
|
|||||||
bst1 <- readRDS("xgb.model.rds")
|
bst1 <- readRDS("xgb.model.rds")
|
||||||
# the handle is invalid:
|
# the handle is invalid:
|
||||||
print(bst1$handle)
|
print(bst1$handle)
|
||||||
|
|
||||||
bst1 <- xgb.Booster.complete(bst1)
|
bst1 <- xgb.Booster.complete(bst1)
|
||||||
# now the handle points to a valid internal booster model:
|
# now the handle points to a valid internal booster model:
|
||||||
print(bst1$handle)
|
print(bst1$handle)
|
||||||
|
|||||||
@ -2,23 +2,28 @@
|
|||||||
% Please edit documentation in R/xgb.DMatrix.R
|
% Please edit documentation in R/xgb.DMatrix.R
|
||||||
\name{xgb.DMatrix}
|
\name{xgb.DMatrix}
|
||||||
\alias{xgb.DMatrix}
|
\alias{xgb.DMatrix}
|
||||||
\title{Contruct xgb.DMatrix object}
|
\title{Construct xgb.DMatrix object}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.DMatrix(data, info = list(), missing = NA, ...)
|
xgb.DMatrix(data, info = list(), missing = NA, silent = FALSE, ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename}
|
\item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character
|
||||||
|
string representing a filename.}
|
||||||
|
|
||||||
\item{info}{a list of information of the xgb.DMatrix object}
|
\item{info}{a named list of additional information to store in the \code{xgb.DMatrix} object.
|
||||||
|
See \code{\link{setinfo}} for the specific allowed kinds of}
|
||||||
|
|
||||||
\item{missing}{Missing is only used when input is dense matrix, pick a float
|
\item{missing}{a float value to represents missing values in data (used only when input is a dense matrix).
|
||||||
value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.}
|
It is useful when a 0 or some other extreme value represents missing values in data.}
|
||||||
|
|
||||||
\item{...}{other information to pass to \code{info}.}
|
\item{silent}{whether to suppress printing an informational message after loading from a file.}
|
||||||
|
|
||||||
|
\item{...}{the \code{info} data could be passed directly as parameters, without creating an \code{info} list.}
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Contruct xgb.DMatrix object from dense matrix, sparse matrix
|
Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file.
|
||||||
or local file (that was created previously by saving an \code{xgb.DMatrix}).
|
Supported input file formats are either a libsvm text file or a binary file that was created previously by
|
||||||
|
\code{\link{xgb.DMatrix.save}}).
|
||||||
}
|
}
|
||||||
\examples{
|
\examples{
|
||||||
data(agaricus.train, package='xgboost')
|
data(agaricus.train, package='xgboost')
|
||||||
|
|||||||
@ -4,7 +4,7 @@
|
|||||||
\alias{xgb.dump}
|
\alias{xgb.dump}
|
||||||
\title{Dump an xgboost model in text format.}
|
\title{Dump an xgboost model in text format.}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.dump(model = NULL, fname = NULL, fmap = "", with_stats = FALSE,
|
xgb.dump(model, fname = NULL, fmap = "", with_stats = FALSE,
|
||||||
dump_format = c("text", "json"), ...)
|
dump_format = c("text", "json"), ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
|
|||||||
@ -5,7 +5,7 @@
|
|||||||
\title{Parse a boosted tree model text dump}
|
\title{Parse a boosted tree model text dump}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.model.dt.tree(feature_names = NULL, model = NULL, text = NULL,
|
xgb.model.dt.tree(feature_names = NULL, model = NULL, text = NULL,
|
||||||
trees = NULL, ...)
|
trees = NULL, use_int_id = FALSE, ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{feature_names}{character vector of feature names. If the model already
|
\item{feature_names}{character vector of feature names. If the model already
|
||||||
@ -24,6 +24,9 @@ It could be useful, e.g., in multiclass classification to get only
|
|||||||
the trees of one certain class. IMPORTANT: the tree index in xgboost models
|
the trees of one certain class. IMPORTANT: the tree index in xgboost models
|
||||||
is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).}
|
is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).}
|
||||||
|
|
||||||
|
\item{use_int_id}{a logical flag indicating whether nodes in columns "Yes", "No", "Missing" should be
|
||||||
|
represented as integers (when FALSE) or as "Tree-Node" character strings (when FALSE).}
|
||||||
|
|
||||||
\item{...}{currently not used.}
|
\item{...}{currently not used.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
@ -32,9 +35,9 @@ A \code{data.table} with detailed information about model trees' nodes.
|
|||||||
The columns of the \code{data.table} are:
|
The columns of the \code{data.table} are:
|
||||||
|
|
||||||
\itemize{
|
\itemize{
|
||||||
\item \code{Tree}: ID of a tree in a model (integer)
|
\item \code{Tree}: integer ID of a tree in a model (zero-based index)
|
||||||
\item \code{Node}: integer ID of a node in a tree (integer)
|
\item \code{Node}: integer ID of a node in a tree (zero-based index)
|
||||||
\item \code{ID}: identifier of a node in a model (character)
|
\item \code{ID}: character identifier of a node in a model (only when \code{use_int_id=FALSE})
|
||||||
\item \code{Feature}: for a branch node, it's a feature id or name (when available);
|
\item \code{Feature}: for a branch node, it's a feature id or name (when available);
|
||||||
for a leaf note, it simply labels it as \code{'Leaf'}
|
for a leaf note, it simply labels it as \code{'Leaf'}
|
||||||
\item \code{Split}: location of the split for a branch node (split condition is always "less than")
|
\item \code{Split}: location of the split for a branch node (split condition is always "less than")
|
||||||
@ -44,7 +47,11 @@ The columns of the \code{data.table} are:
|
|||||||
\item \code{Quality}: either the split gain (change in loss) or the leaf value
|
\item \code{Quality}: either the split gain (change in loss) or the leaf value
|
||||||
\item \code{Cover}: metric related to the number of observation either seen by a split
|
\item \code{Cover}: metric related to the number of observation either seen by a split
|
||||||
or collected by a leaf during training.
|
or collected by a leaf during training.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
When \code{use_int_id=FALSE}, columns "Yes", "No", and "Missing" point to model-wide node identifiers
|
||||||
|
in the "ID" column. When \code{use_int_id=TRUE}, those columns point to node identifiers from
|
||||||
|
the corresponding trees in the "Node" column.
|
||||||
}
|
}
|
||||||
\description{
|
\description{
|
||||||
Parse a boosted tree model text dump into a \code{data.table} structure.
|
Parse a boosted tree model text dump into a \code{data.table} structure.
|
||||||
@ -58,8 +65,9 @@ bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_dep
|
|||||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||||
|
|
||||||
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
(dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst))
|
||||||
# This bst has feature_names stored in it, so those would be used when
|
|
||||||
# the feature_names parameter is not provided:
|
# This bst model already has feature_names stored with it, so those would be used when
|
||||||
|
# feature_names is not set:
|
||||||
(dt <- xgb.model.dt.tree(model = bst))
|
(dt <- xgb.model.dt.tree(model = bst))
|
||||||
|
|
||||||
# How to match feature names of splits that are following a current 'Yes' branch:
|
# How to match feature names of splits that are following a current 'Yes' branch:
|
||||||
|
|||||||
@ -24,7 +24,7 @@ IMPORTANT: the tree index in xgboost model is zero-based
|
|||||||
|
|
||||||
\item{render}{a logical flag for whether the graph should be rendered (see Value).}
|
\item{render}{a logical flag for whether the graph should be rendered (see Value).}
|
||||||
|
|
||||||
\item{show_node_id}{a logical flag for whether to include node id's in the graph.}
|
\item{show_node_id}{a logical flag for whether to show node id's in the graph.}
|
||||||
|
|
||||||
\item{...}{currently not used.}
|
\item{...}{currently not used.}
|
||||||
}
|
}
|
||||||
@ -68,9 +68,17 @@ data(agaricus.train, package='xgboost')
|
|||||||
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3,
|
||||||
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic")
|
||||||
# plot all the trees
|
# plot all the trees
|
||||||
xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst)
|
xgb.plot.tree(model = bst)
|
||||||
# plot only the first tree and include the node ID:
|
# plot only the first tree and display the node ID:
|
||||||
xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst,
|
xgb.plot.tree(model = bst, trees = 0, show_node_id = TRUE)
|
||||||
trees = 0, show_node_id = TRUE)
|
|
||||||
|
\dontrun{
|
||||||
|
# Below is an example of how to save this plot to a file.
|
||||||
|
# Note that for `export_graph` to work, the DiagrammeRsvg and rsvg packages must also be installed.
|
||||||
|
library(DiagrammeR)
|
||||||
|
gr <- xgb.plot.tree(model=bst, trees=0:1, render=FALSE)
|
||||||
|
export_graph(gr, 'tree.pdf', width=1500, height=1900)
|
||||||
|
export_graph(gr, 'tree.png', width=1500, height=1900)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -12,7 +12,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL,
|
|||||||
|
|
||||||
xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
|
xgboost(data = NULL, label = NULL, missing = NA, weight = NULL,
|
||||||
params = list(), nrounds, verbose = 1, print_every_n = 1L,
|
params = list(), nrounds, verbose = 1, print_every_n = 1L,
|
||||||
early_stopping_rounds = NULL, maximize = NULL, save_period = 0,
|
early_stopping_rounds = NULL, maximize = NULL, save_period = NULL,
|
||||||
save_name = "xgboost.model", xgb_model = NULL, callbacks = list(), ...)
|
save_name = "xgboost.model", xgb_model = NULL, callbacks = list(), ...)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
|
|||||||
@ -68,12 +68,19 @@ SEXP XGDMatrixCreateFromMat_R(SEXP mat,
|
|||||||
SEXP dim = getAttrib(mat, R_DimSymbol);
|
SEXP dim = getAttrib(mat, R_DimSymbol);
|
||||||
size_t nrow = static_cast<size_t>(INTEGER(dim)[0]);
|
size_t nrow = static_cast<size_t>(INTEGER(dim)[0]);
|
||||||
size_t ncol = static_cast<size_t>(INTEGER(dim)[1]);
|
size_t ncol = static_cast<size_t>(INTEGER(dim)[1]);
|
||||||
double *din = REAL(mat);
|
const bool is_int = TYPEOF(mat) == INTSXP;
|
||||||
|
double *din;
|
||||||
|
int *iin;
|
||||||
|
if (is_int) {
|
||||||
|
iin = INTEGER(mat);
|
||||||
|
} else {
|
||||||
|
din = REAL(mat);
|
||||||
|
}
|
||||||
std::vector<float> data(nrow * ncol);
|
std::vector<float> data(nrow * ncol);
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
for (omp_ulong i = 0; i < nrow; ++i) {
|
for (omp_ulong i = 0; i < nrow; ++i) {
|
||||||
for (size_t j = 0; j < ncol; ++j) {
|
for (size_t j = 0; j < ncol; ++j) {
|
||||||
data[i * ncol +j] = din[i + nrow * j];
|
data[i * ncol +j] = is_int ? static_cast<float>(iin[i + nrow * j]) : din[i + nrow * j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DMatrixHandle handle;
|
DMatrixHandle handle;
|
||||||
|
|||||||
@ -189,3 +189,36 @@ test_that("xgb.cv works", {
|
|||||||
expect_false(is.null(cv$callbacks))
|
expect_false(is.null(cv$callbacks))
|
||||||
expect_false(is.null(cv$call))
|
expect_false(is.null(cv$call))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test_that("train and predict with non-strict classes", {
|
||||||
|
# standard dense matrix input
|
||||||
|
train_dense <- as.matrix(train$data)
|
||||||
|
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
||||||
|
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
||||||
|
pr0 <- predict(bst, train_dense)
|
||||||
|
|
||||||
|
# dense matrix-like input of non-matrix class
|
||||||
|
class(train_dense) <- 'shmatrix'
|
||||||
|
expect_true(is.matrix(train_dense))
|
||||||
|
expect_error(
|
||||||
|
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
||||||
|
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
||||||
|
, regexp = NA)
|
||||||
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
|
expect_equal(pr0, pr)
|
||||||
|
|
||||||
|
# dense matrix-like input of non-matrix class with some inheritance
|
||||||
|
class(train_dense) <- c('pphmatrix','shmatrix')
|
||||||
|
expect_true(is.matrix(train_dense))
|
||||||
|
expect_error(
|
||||||
|
bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
|
||||||
|
eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
|
||||||
|
, regexp = NA)
|
||||||
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
|
expect_equal(pr0, pr)
|
||||||
|
|
||||||
|
# when someone inhertis from xgb.Booster, it should still be possible to use it as xgb.Booster
|
||||||
|
class(bst) <- c('super.Booster', 'xgb.Booster')
|
||||||
|
expect_error(pr <- predict(bst, train_dense), regexp = NA)
|
||||||
|
expect_equal(pr0, pr)
|
||||||
|
})
|
||||||
|
|||||||
@ -7,18 +7,30 @@ data(agaricus.test, package='xgboost')
|
|||||||
test_data <- agaricus.test$data[1:100,]
|
test_data <- agaricus.test$data[1:100,]
|
||||||
test_label <- agaricus.test$label[1:100]
|
test_label <- agaricus.test$label[1:100]
|
||||||
|
|
||||||
test_that("xgb.DMatrix: basic construction, saving, loading", {
|
test_that("xgb.DMatrix: basic construction", {
|
||||||
# from sparse matrix
|
# from sparse matrix
|
||||||
dtest1 <- xgb.DMatrix(test_data, label=test_label)
|
dtest1 <- xgb.DMatrix(test_data, label=test_label)
|
||||||
|
|
||||||
# from dense matrix
|
# from dense matrix
|
||||||
dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label)
|
dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label)
|
||||||
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label'))
|
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label'))
|
||||||
|
expect_equal(dim(dtest1), dim(dtest2))
|
||||||
|
|
||||||
|
#from dense integer matrix
|
||||||
|
int_data <- as.matrix(test_data)
|
||||||
|
storage.mode(int_data) <- "integer"
|
||||||
|
dtest3 <- xgb.DMatrix(int_data, label=test_label)
|
||||||
|
expect_equal(dim(dtest1), dim(dtest3))
|
||||||
|
})
|
||||||
|
|
||||||
|
test_that("xgb.DMatrix: saving, loading", {
|
||||||
# save to a local file
|
# save to a local file
|
||||||
|
dtest1 <- xgb.DMatrix(test_data, label=test_label)
|
||||||
tmp_file <- tempfile('xgb.DMatrix_')
|
tmp_file <- tempfile('xgb.DMatrix_')
|
||||||
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
|
expect_true(xgb.DMatrix.save(dtest1, tmp_file))
|
||||||
# read from a local file
|
# read from a local file
|
||||||
dtest3 <- xgb.DMatrix(tmp_file)
|
expect_output(dtest3 <- xgb.DMatrix(tmp_file), "entries loaded from")
|
||||||
|
expect_output(dtest3 <- xgb.DMatrix(tmp_file, silent = TRUE), NA)
|
||||||
unlink(tmp_file)
|
unlink(tmp_file)
|
||||||
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
|
expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label'))
|
||||||
|
|
||||||
@ -26,7 +38,7 @@ test_that("xgb.DMatrix: basic construction, saving, loading", {
|
|||||||
tmp <- c("0 1:1 2:1","1 3:1","0 1:1")
|
tmp <- c("0 1:1 2:1","1 3:1","0 1:1")
|
||||||
tmp_file <- 'tmp.libsvm'
|
tmp_file <- 'tmp.libsvm'
|
||||||
writeLines(tmp, tmp_file)
|
writeLines(tmp, tmp_file)
|
||||||
dtest4 <- xgb.DMatrix(tmp_file)
|
dtest4 <- xgb.DMatrix(tmp_file, silent = TRUE)
|
||||||
expect_equal(dim(dtest4), c(3, 4))
|
expect_equal(dim(dtest4), c(3, 4))
|
||||||
expect_equal(getinfo(dtest4, 'label'), c(0,1,0))
|
expect_equal(getinfo(dtest4, 'label'), c(0,1,0))
|
||||||
unlink(tmp_file)
|
unlink(tmp_file)
|
||||||
|
|||||||
@ -133,6 +133,12 @@ test_that("xgb.model.dt.tree works with and without feature names", {
|
|||||||
dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.x)
|
dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.x)
|
||||||
expect_output(str(dt.tree.x), 'Feature.*\\"3\\"')
|
expect_output(str(dt.tree.x), 'Feature.*\\"3\\"')
|
||||||
expect_equal(dt.tree[, -4, with=FALSE], dt.tree.x[, -4, with=FALSE])
|
expect_equal(dt.tree[, -4, with=FALSE], dt.tree.x[, -4, with=FALSE])
|
||||||
|
|
||||||
|
# using integer node ID instead of character
|
||||||
|
dt.tree.int <- xgb.model.dt.tree(model = bst.Tree, use_int_id = TRUE)
|
||||||
|
expect_equal(as.integer(tstrsplit(dt.tree$Yes, '-')[[2]]), dt.tree.int$Yes)
|
||||||
|
expect_equal(as.integer(tstrsplit(dt.tree$No, '-')[[2]]), dt.tree.int$No)
|
||||||
|
expect_equal(as.integer(tstrsplit(dt.tree$Missing, '-')[[2]]), dt.tree.int$Missing)
|
||||||
})
|
})
|
||||||
|
|
||||||
test_that("xgb.model.dt.tree throws error for gblinear", {
|
test_that("xgb.model.dt.tree throws error for gblinear", {
|
||||||
@ -169,6 +175,17 @@ test_that("xgb.importance works with GLM model", {
|
|||||||
xgb.ggplot.importance(importance.GLM)
|
xgb.ggplot.importance(importance.GLM)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test_that("xgb.model.dt.tree and xgb.importance work with a single split model", {
|
||||||
|
bst1 <- xgboost(data = sparse_matrix, label = label, max_depth = 1,
|
||||||
|
eta = 1, nthread = 2, nrounds = 1, verbose = 0,
|
||||||
|
objective = "binary:logistic")
|
||||||
|
expect_error(dt <- xgb.model.dt.tree(model = bst1), regexp = NA) # no error
|
||||||
|
expect_equal(nrow(dt), 3)
|
||||||
|
expect_error(imp <- xgb.importance(model = bst1), regexp = NA) # no error
|
||||||
|
expect_equal(nrow(imp), 1)
|
||||||
|
expect_equal(imp$Gain, 1)
|
||||||
|
})
|
||||||
|
|
||||||
test_that("xgb.plot.tree works with and without feature names", {
|
test_that("xgb.plot.tree works with and without feature names", {
|
||||||
xgb.plot.tree(feature_names = feature.names, model = bst.Tree)
|
xgb.plot.tree(feature_names = feature.names, model = bst.Tree)
|
||||||
xgb.plot.tree(model = bst.Tree)
|
xgb.plot.tree(model = bst.Tree)
|
||||||
|
|||||||
@ -120,6 +120,7 @@ R Package
|
|||||||
make rcpplint
|
make rcpplint
|
||||||
```
|
```
|
||||||
- When needed, you can disable the linter warning of certain line with ```// NOLINT(*)``` comments.
|
- When needed, you can disable the linter warning of certain line with ```// NOLINT(*)``` comments.
|
||||||
|
- We use [roxygen](https://cran.r-project.org/web/packages/roxygen2/vignettes/roxygen2.html) for documenting the R package.
|
||||||
|
|
||||||
### Rmarkdown Vignettes
|
### Rmarkdown Vignettes
|
||||||
Rmarkdown vignettes are placed in [R-package/vignettes](../R-package/vignettes)
|
Rmarkdown vignettes are placed in [R-package/vignettes](../R-package/vignettes)
|
||||||
@ -143,3 +144,17 @@ make the-markdown-to-make.md
|
|||||||
make html
|
make html
|
||||||
```
|
```
|
||||||
The reason we do this is to avoid exploded repo size due to generated images sizes.
|
The reason we do this is to avoid exploded repo size due to generated images sizes.
|
||||||
|
|
||||||
|
### R package versioning
|
||||||
|
Since version 0.6.4.3, we have adopted a versioning system that uses an ```x.y.z``` (or ```core_major.core_minor.cran_release```)
|
||||||
|
format for CRAN releases and an ```x.y.z.p``` (or ```core_major.core_minor.cran_release.patch```) format for development patch versions.
|
||||||
|
This approach is similar to the one described in Yihui Xie's
|
||||||
|
[blog post on R Package Versioning](https://yihui.name/en/2013/06/r-package-versioning/),
|
||||||
|
except we need an additional field to accomodate the ```x.y``` core library version.
|
||||||
|
|
||||||
|
Each new CRAN release bumps up the 3rd field, while developments in-between CRAN releases
|
||||||
|
would be marked by an additional 4th field on the top of an existing CRAN release version.
|
||||||
|
Some additional consideration is needed when the core library version changes.
|
||||||
|
E.g., after the core changes from 0.6 to 0.7, the R package development version would become 0.7.0.1, working towards
|
||||||
|
a 0.7.1 CRAN release. The 0.7.0 would not be released to CRAN, unless it would require almost no additional development.
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user