[R] maintenance Apr 2017 (#2237)

* [R] make sure things work for a single split model; fixes #2191

* [R] add option use_int_id to xgb.model.dt.tree

* [R] add example of exporting tree plot to a file

* [R] set save_period = NULL as default in xgboost() to be the same as in xgb.train; fixes #2182

* [R] it's a good practice after CRAN releases to bump up package version in dev

* [R] allow xgb.DMatrix construction from integer dense matrices

* [R] xgb.DMatrix: silent parameter; improve documentation

* [R] xgb.model.dt.tree code style changes

* [R] update NEWS with parameter changes

* [R] code safety & style; handle non-strict matrix and inherited classes of input and model; fixes #2242

* [R] change to x.y.z.p R-package versioning scheme and set version to 0.6.4.3

* [R] add an R package versioning section to the contributors guide

* [R] R-package/README.md: clean up the redundant old installation instructions, link the contributors guide
This commit is contained in:
Vadim Khotilovich
2017-05-02 00:51:34 -05:00
committed by Tong He
parent d769b6bcb5
commit a375ad2822
29 changed files with 351 additions and 246 deletions

View File

@@ -47,7 +47,7 @@ check.booster.params <- function(params, ...) {
multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric')
if (length(multi_names) > 0) {
warning("The following parameters were provided multiple times:\n\t",
paste(multi_names, collapse=', '), "\n Only the last value for each of them will be used.\n")
paste(multi_names, collapse = ', '), "\n Only the last value for each of them will be used.\n")
# While xgboost internals would choose the last value for a multiple-times parameter,
# enforce it here in R as well (b/c multi-parameters might be used further in R code,
# and R takes the 1st value when multiple elements with the same name are present in a list).
@@ -120,22 +120,22 @@ check.custom.eval <- function(env = parent.frame()) {
}
# Update booster with dtrain for an iteration
xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
if (class(booster) != "xgb.Booster.handle") {
stop("first argument type must be xgb.Booster.handle")
# Update a booster handle for an iteration with dtrain data
xgb.iter.update <- function(booster_handle, dtrain, iter, obj = NULL) {
if (!identical(class(booster_handle), "xgb.Booster.handle")) {
stop("booster_handle must be of xgb.Booster.handle class")
}
if (class(dtrain) != "xgb.DMatrix") {
stop("second argument type must be xgb.DMatrix")
if (!inherits(dtrain, "xgb.DMatrix")) {
stop("dtrain must be of xgb.DMatrix class")
}
if (is.null(obj)) {
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
.Call("XGBoosterUpdateOneIter_R", booster_handle, as.integer(iter), dtrain,
PACKAGE = "xgboost")
} else {
pred <- predict(booster, dtrain)
pred <- predict(booster_handle, dtrain)
gpair <- obj(pred, dtrain)
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE = "xgboost")
.Call("XGBoosterBoostOneIter_R", booster_handle, dtrain, gpair$grad, gpair$hess, PACKAGE = "xgboost")
}
return(TRUE)
}
@@ -144,16 +144,16 @@ xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
# Evaluate one iteration.
# Returns a named vector of evaluation metrics
# with the names in a 'datasetname-metricname' format.
xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
if (class(booster) != "xgb.Booster.handle")
stop("first argument type must be xgb.Booster.handle")
xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) {
if (!identical(class(booster_handle), "xgb.Booster.handle"))
stop("class of booster_handle must be xgb.Booster.handle")
if (length(watchlist) == 0)
return(NULL)
evnames <- names(watchlist)
if (is.null(feval)) {
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
msg <- .Call("XGBoosterEvalOneIter_R", booster_handle, as.integer(iter), watchlist,
as.list(evnames), PACKAGE = "xgboost")
msg <- stri_split_regex(msg, '(\\s+|:|\\s+)')[[1]][-1]
res <- as.numeric(msg[c(FALSE,TRUE)]) # even indices are the values
@@ -161,7 +161,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
} else {
res <- sapply(seq_along(watchlist), function(j) {
w <- watchlist[[j]]
preds <- predict(booster, w) # predict using all trees
preds <- predict(booster_handle, w) # predict using all trees
eval_res <- feval(preds, w)
out <- eval_res$value
names(out) <- paste0(evnames[j], "-", eval_res$metric)
@@ -180,7 +180,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) {
generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
# cannot do it for rank
if (exists('objective', where=params) &&
if (exists('objective', where = params) &&
is.character(params$objective) &&
strtrim(params$objective, 5) == 'rank:') {
stop("\n\tAutomatic generation of CV-folds is not implemented for ranking!\n",
@@ -195,7 +195,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
# - For classification, need to convert y labels to factor before making the folds,
# and then do stratification by factor levels.
# - For regression, leave y numeric and do stratification by quantiles.
if (exists('objective', where=params) &&
if (exists('objective', where = params) &&
is.character(params$objective)) {
# If 'objective' provided in params, assume that y is a classification label
# unless objective is reg:linear
@@ -306,7 +306,7 @@ depr_par_lut <- matrix(c(
'plot.width','plot_width',
'n_first_tree', 'trees',
'dummy', 'DUMMY'
), ncol=2, byrow = TRUE)
), ncol = 2, byrow = TRUE)
colnames(depr_par_lut) <- c('old', 'new')
# Checks the dot-parameters for deprecated names
@@ -331,7 +331,7 @@ check.deprecation <- function(..., env = parent.frame()) {
if (!ex_match[i]) {
warning("'", pars_par, "' was partially matched to '", old_par,"'")
}
.Deprecated(new_par, old=old_par, package = 'xgboost')
.Deprecated(new_par, old = old_par, package = 'xgboost')
if (new_par != 'NULL') {
eval(parse(text = paste(new_par, '<-', pars[[pars_par]])), envir = env)
}