From a375ad28221791b1f63863c09ced42e1ab44f148 Mon Sep 17 00:00:00 2001 From: Vadim Khotilovich Date: Tue, 2 May 2017 00:51:34 -0500 Subject: [PATCH] [R] maintenance Apr 2017 (#2237) * [R] make sure things work for a single split model; fixes #2191 * [R] add option use_int_id to xgb.model.dt.tree * [R] add example of exporting tree plot to a file * [R] set save_period = NULL as default in xgboost() to be the same as in xgb.train; fixes #2182 * [R] it's a good practice after CRAN releases to bump up package version in dev * [R] allow xgb.DMatrix construction from integer dense matrices * [R] xgb.DMatrix: silent parameter; improve documentation * [R] xgb.model.dt.tree code style changes * [R] update NEWS with parameter changes * [R] code safety & style; handle non-strict matrix and inherited classes of input and model; fixes #2242 * [R] change to x.y.z.p R-package versioning scheme and set version to 0.6.4.3 * [R] add an R package versioning section to the contributors guide * [R] R-package/README.md: clean up the redundant old installation instructions, link the contributors guide --- NEWS.md | 5 ++ R-package/DESCRIPTION | 2 +- R-package/R/callbacks.R | 18 +++--- R-package/R/utils.R | 40 ++++++------ R-package/R/xgb.Booster.R | 55 ++++++++-------- R-package/R/xgb.DMatrix.R | 52 +++++++-------- R-package/R/xgb.DMatrix.save.R | 6 +- R-package/R/xgb.cv.R | 24 +++---- R-package/R/xgb.dump.R | 16 ++--- R-package/R/xgb.importance.R | 8 ++- R-package/R/xgb.model.dt.tree.R | 84 +++++++++++++++---------- R-package/R/xgb.plot.deepness.R | 16 ++--- R-package/R/xgb.plot.importance.R | 10 +-- R-package/R/xgb.plot.tree.R | 22 ++++--- R-package/R/xgb.save.R | 9 +-- R-package/R/xgb.train.R | 12 ++-- R-package/R/xgboost.R | 4 +- R-package/README.md | 48 ++------------ R-package/man/xgb.Booster.complete.Rd | 5 +- R-package/man/xgb.DMatrix.Rd | 23 ++++--- R-package/man/xgb.dump.Rd | 2 +- R-package/man/xgb.model.dt.tree.Rd | 22 ++++--- R-package/man/xgb.plot.tree.Rd | 18 ++++-- R-package/man/xgb.train.Rd | 2 +- R-package/src/xgboost_R.cc | 11 +++- R-package/tests/testthat/test_basic.R | 33 ++++++++++ R-package/tests/testthat/test_dmatrix.R | 18 +++++- R-package/tests/testthat/test_helpers.R | 17 +++++ doc/how_to/contribute.md | 15 +++++ 29 files changed, 351 insertions(+), 246 deletions(-) diff --git a/NEWS.md b/NEWS.md index 48c382169..61da8a36b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,11 @@ This file records the changes in xgboost library in reverse chronological order. - Thread local variable is upgraded so it is automatically freed at thread exit. * Migrate to C++11 - The current master version now requires C++11 enabled compiled(g++4.8 or higher) +* R package: + - New parameters: + - `silent` in `xgb.DMatrix()` + - `use_int_id` in `xgb.model.dt.tree()` + - Default value of the `save_period` parameter in `xgboost()` changed to NULL (consistent with `xgb.train()`). ## v0.6 (2016.07.29) * Version 0.5 is skipped due to major improvements in the core diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 1db99e49a..637e3435d 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,7 +1,7 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 0.6-4 +Version: 0.6.4.3 Date: 2017-01-04 Author: Tianqi Chen , Tong He , Michael Benesty , Vadim Khotilovich , diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index 9de59b6b6..0ce732e8a 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -57,7 +57,7 @@ NULL #' \code{\link{callbacks}} #' #' @export -cb.print.evaluation <- function(period=1, showsd=TRUE) { +cb.print.evaluation <- function(period = 1, showsd = TRUE) { callback <- function(env = parent.frame()) { if (length(env$bst_evaluation) == 0 || @@ -132,7 +132,7 @@ cb.evaluation.log <- function() { cnames <- numeric(len) cnames[c(TRUE, FALSE)] <- means cnames[c(FALSE, TRUE)] <- stds - env$evaluation_log <- env$evaluation_log[, c('iter', cnames), with=FALSE] + env$evaluation_log <- env$evaluation_log[, c('iter', cnames), with = FALSE] } } @@ -290,8 +290,8 @@ cb.reset.parameters <- function(new_params) { #' \code{\link{xgb.attr}} #' #' @export -cb.early.stop <- function(stopping_rounds, maximize=FALSE, - metric_name=NULL, verbose=TRUE) { +cb.early.stop <- function(stopping_rounds, maximize = FALSE, + metric_name = NULL, verbose = TRUE) { # state variables best_iteration <- -1 best_ntreelimit <- -1 @@ -308,7 +308,7 @@ cb.early.stop <- function(stopping_rounds, maximize=FALSE, metric_idx <<- which(gsub('-', '_', metric_name) == eval_names) if (length(metric_idx) == 0) stop("'metric_name' for early stopping is not one of the following:\n", - paste(eval_names, collapse=' '), '\n') + paste(eval_names, collapse = ' '), '\n') } if (is.null(metric_name) && length(env$bst_evaluation) > 1) { @@ -334,7 +334,7 @@ cb.early.stop <- function(stopping_rounds, maximize=FALSE, env$stop_condition <- FALSE if (!is.null(env$bst)) { - if (class(env$bst) != 'xgb.Booster') + if (!inherits(env$bst, 'xgb.Booster')) stop("'bst' in the parent frame must be an 'xgb.Booster'") if (!is.null(best_score <- xgb.attr(env$bst$handle, 'best_score'))) { best_score <<- as.numeric(best_score) @@ -529,7 +529,7 @@ cb.cv.predict <- function(save_models = FALSE) { # # Format the evaluation metric string -format.eval.string <- function(iter, eval_res, eval_err=NULL) { +format.eval.string <- function(iter, eval_res, eval_err = NULL) { if (length(eval_res) == 0) stop('no evaluation results') enames <- names(eval_res) @@ -539,9 +539,9 @@ format.eval.string <- function(iter, eval_res, eval_err=NULL) { if (!is.null(eval_err)) { if (length(eval_res) != length(eval_err)) stop('eval_res & eval_err lengths mismatch') - res <- paste0(sprintf("%s:%f+%f", enames, eval_res, eval_err), collapse='\t') + res <- paste0(sprintf("%s:%f+%f", enames, eval_res, eval_err), collapse = '\t') } else { - res <- paste0(sprintf("%s:%f", enames, eval_res), collapse='\t') + res <- paste0(sprintf("%s:%f", enames, eval_res), collapse = '\t') } return(paste0(iter, res)) } diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 1ae822eed..f66f7cf99 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -47,7 +47,7 @@ check.booster.params <- function(params, ...) { multi_names <- setdiff(names(name_freqs[name_freqs > 1]), 'eval_metric') if (length(multi_names) > 0) { warning("The following parameters were provided multiple times:\n\t", - paste(multi_names, collapse=', '), "\n Only the last value for each of them will be used.\n") + paste(multi_names, collapse = ', '), "\n Only the last value for each of them will be used.\n") # While xgboost internals would choose the last value for a multiple-times parameter, # enforce it here in R as well (b/c multi-parameters might be used further in R code, # and R takes the 1st value when multiple elements with the same name are present in a list). @@ -120,22 +120,22 @@ check.custom.eval <- function(env = parent.frame()) { } -# Update booster with dtrain for an iteration -xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) { - if (class(booster) != "xgb.Booster.handle") { - stop("first argument type must be xgb.Booster.handle") +# Update a booster handle for an iteration with dtrain data +xgb.iter.update <- function(booster_handle, dtrain, iter, obj = NULL) { + if (!identical(class(booster_handle), "xgb.Booster.handle")) { + stop("booster_handle must be of xgb.Booster.handle class") } - if (class(dtrain) != "xgb.DMatrix") { - stop("second argument type must be xgb.DMatrix") + if (!inherits(dtrain, "xgb.DMatrix")) { + stop("dtrain must be of xgb.DMatrix class") } if (is.null(obj)) { - .Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain, + .Call("XGBoosterUpdateOneIter_R", booster_handle, as.integer(iter), dtrain, PACKAGE = "xgboost") } else { - pred <- predict(booster, dtrain) + pred <- predict(booster_handle, dtrain) gpair <- obj(pred, dtrain) - .Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE = "xgboost") + .Call("XGBoosterBoostOneIter_R", booster_handle, dtrain, gpair$grad, gpair$hess, PACKAGE = "xgboost") } return(TRUE) } @@ -144,16 +144,16 @@ xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) { # Evaluate one iteration. # Returns a named vector of evaluation metrics # with the names in a 'datasetname-metricname' format. -xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) { - if (class(booster) != "xgb.Booster.handle") - stop("first argument type must be xgb.Booster.handle") - +xgb.iter.eval <- function(booster_handle, watchlist, iter, feval = NULL) { + if (!identical(class(booster_handle), "xgb.Booster.handle")) + stop("class of booster_handle must be xgb.Booster.handle") + if (length(watchlist) == 0) return(NULL) evnames <- names(watchlist) if (is.null(feval)) { - msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist, + msg <- .Call("XGBoosterEvalOneIter_R", booster_handle, as.integer(iter), watchlist, as.list(evnames), PACKAGE = "xgboost") msg <- stri_split_regex(msg, '(\\s+|:|\\s+)')[[1]][-1] res <- as.numeric(msg[c(FALSE,TRUE)]) # even indices are the values @@ -161,7 +161,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) { } else { res <- sapply(seq_along(watchlist), function(j) { w <- watchlist[[j]] - preds <- predict(booster, w) # predict using all trees + preds <- predict(booster_handle, w) # predict using all trees eval_res <- feval(preds, w) out <- eval_res$value names(out) <- paste0(evnames[j], "-", eval_res$metric) @@ -180,7 +180,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL) { generate.cv.folds <- function(nfold, nrows, stratified, label, params) { # cannot do it for rank - if (exists('objective', where=params) && + if (exists('objective', where = params) && is.character(params$objective) && strtrim(params$objective, 5) == 'rank:') { stop("\n\tAutomatic generation of CV-folds is not implemented for ranking!\n", @@ -195,7 +195,7 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) { # - For classification, need to convert y labels to factor before making the folds, # and then do stratification by factor levels. # - For regression, leave y numeric and do stratification by quantiles. - if (exists('objective', where=params) && + if (exists('objective', where = params) && is.character(params$objective)) { # If 'objective' provided in params, assume that y is a classification label # unless objective is reg:linear @@ -306,7 +306,7 @@ depr_par_lut <- matrix(c( 'plot.width','plot_width', 'n_first_tree', 'trees', 'dummy', 'DUMMY' -), ncol=2, byrow = TRUE) +), ncol = 2, byrow = TRUE) colnames(depr_par_lut) <- c('old', 'new') # Checks the dot-parameters for deprecated names @@ -331,7 +331,7 @@ check.deprecation <- function(..., env = parent.frame()) { if (!ex_match[i]) { warning("'", pars_par, "' was partially matched to '", old_par,"'") } - .Deprecated(new_par, old=old_par, package = 'xgboost') + .Deprecated(new_par, old = old_par, package = 'xgboost') if (new_par != 'NULL') { eval(parse(text = paste(new_par, '<-', pars[[pars_par]])), envir = env) } diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index 8aafee1f3..2b2d2f241 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -1,19 +1,19 @@ -# Construct an internal xgboost Booster and return a handle to it +# Construct an internal xgboost Booster and return a handle to it. # internal utility function xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = NULL) { if (typeof(cachelist) != "list" || - any(sapply(cachelist, class) != 'xgb.DMatrix')) { - stop("xgb.Booster only accepts list of DMatrix as cachelist") + !all(sapply(cachelist, inherits, 'xgb.DMatrix'))) { + stop("cachelist must be a list of xgb.DMatrix objects") } handle <- .Call("XGBoosterCreate_R", cachelist, PACKAGE = "xgboost") if (!is.null(modelfile)) { if (typeof(modelfile) == "character") { - .Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost") + .Call("XGBoosterLoadModel_R", handle, modelfile[1], PACKAGE = "xgboost") } else if (typeof(modelfile) == "raw") { .Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost") - } else if (class(modelfile) == "xgb.Booster") { - bst <- xgb.Booster.complete(modelfile, saveraw=TRUE) + } else if (inherits(modelfile, "xgb.Booster")) { + bst <- xgb.Booster.complete(modelfile, saveraw = TRUE) .Call("XGBoosterLoadModelFromRaw_R", handle, bst$raw, PACKAGE = "xgboost") } else { stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object") @@ -37,10 +37,10 @@ xgb.handleToBooster <- function(handle, raw = NULL) { # Check whether xgb.Booster.handle is null # internal utility function is.null.handle <- function(handle) { - if (class(handle) != "xgb.Booster.handle") + if (!identical(class(handle), "xgb.Booster.handle")) stop("argument type must be xgb.Booster.handle") - if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE="xgboost")) + if (is.null(handle) || .Call("XGCheckNullPtr_R", handle, PACKAGE = "xgboost")) return(TRUE) return(FALSE) } @@ -78,8 +78,8 @@ xgb.get.handle <- function(object) { #' its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods #' should still work for such a model object since those methods would be using #' \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the -#' \code{xgb.Booster.complete} function once after loading a model as an R-object. That which would -#' prevent further reconstruction (potentially, multiple times) of an internal booster model. +#' \code{xgb.Booster.complete} function explicitely once after loading a model as an R-object. +#' That would prevent further repeated implicit reconstruction of an internal booster model. #' #' @return #' An object of \code{xgb.Booster} class. @@ -94,13 +94,14 @@ xgb.get.handle <- function(object) { #' bst1 <- readRDS("xgb.model.rds") #' # the handle is invalid: #' print(bst1$handle) +#' #' bst1 <- xgb.Booster.complete(bst1) #' # now the handle points to a valid internal booster model: #' print(bst1$handle) #' #' @export xgb.Booster.complete <- function(object, saveraw = TRUE) { - if (class(object) != "xgb.Booster") + if (!inherits(object, "xgb.Booster")) stop("argument type must be xgb.Booster") if (is.null.handle(object$handle)) { @@ -225,7 +226,7 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE, reshape = FALSE, ...) { object <- xgb.Booster.complete(object, saveraw = FALSE) - if (class(newdata) != "xgb.DMatrix") + if (!inherits(newdata, "xgb.DMatrix")) newdata <- xgb.DMatrix(newdata, missing = missing) if (is.null(ntreelimit)) ntreelimit <- NVL(object$best_ntreelimit, 0) @@ -337,7 +338,7 @@ predict.xgb.Booster.handle <- function(object, ...) { xgb.attr <- function(object, name) { if (is.null(name) || nchar(as.character(name[1])) == 0) stop("invalid attribute name") handle <- xgb.get.handle(object) - .Call("XGBoosterGetAttr_R", handle, as.character(name[1]), PACKAGE="xgboost") + .Call("XGBoosterGetAttr_R", handle, as.character(name[1]), PACKAGE = "xgboost") } #' @rdname xgb.attr @@ -354,7 +355,7 @@ xgb.attr <- function(object, name) { value <- as.character(value[1]) } } - .Call("XGBoosterSetAttr_R", handle, as.character(name[1]), value, PACKAGE="xgboost") + .Call("XGBoosterSetAttr_R", handle, as.character(name[1]), value, PACKAGE = "xgboost") if (is(object, 'xgb.Booster') && !is.null(object$raw)) { object$raw <- xgb.save.raw(object$handle) } @@ -365,10 +366,10 @@ xgb.attr <- function(object, name) { #' @export xgb.attributes <- function(object) { handle <- xgb.get.handle(object) - attr_names <- .Call("XGBoosterGetAttrNames_R", handle, PACKAGE="xgboost") + attr_names <- .Call("XGBoosterGetAttrNames_R", handle, PACKAGE = "xgboost") if (is.null(attr_names)) return(NULL) res <- lapply(attr_names, function(x) { - .Call("XGBoosterGetAttr_R", handle, x, PACKAGE="xgboost") + .Call("XGBoosterGetAttr_R", handle, x, PACKAGE = "xgboost") }) names(res) <- attr_names res @@ -393,7 +394,7 @@ xgb.attributes <- function(object) { }) handle <- xgb.get.handle(object) for (i in seq_along(a)) { - .Call("XGBoosterSetAttr_R", handle, names(a[i]), a[[i]], PACKAGE="xgboost") + .Call("XGBoosterSetAttr_R", handle, names(a[i]), a[[i]], PACKAGE = "xgboost") } if (is(object, 'xgb.Booster') && !is.null(object$raw)) { object$raw <- xgb.save.raw(object$handle) @@ -442,8 +443,8 @@ xgb.attributes <- function(object) { object } -# Extract # of trees in a model -# TODO: either add a getter to C-interface, or simply set an 'ntree' attribute after each iteration +# Extract the number of trees in a model. +# TODO: either add a getter to C-interface, or simply set an 'ntree' attribute after each iteration. # internal utility function xgb.ntree <- function(bst) { length(grep('^booster', xgb.dump(bst))) @@ -470,7 +471,7 @@ xgb.ntree <- function(bst) { #' #' @method print xgb.Booster #' @export -print.xgb.Booster <- function(x, verbose=FALSE, ...) { +print.xgb.Booster <- function(x, verbose = FALSE, ...) { cat('##### xgb.Booster\n') valid_handle <- is.null.handle(x$handle) @@ -479,7 +480,7 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) { cat('raw: ') if (!is.null(x$raw)) { - cat(format(object.size(x$raw), units="auto"), '\n') + cat(format(object.size(x$raw), units = "auto"), '\n') } else { cat('NULL\n') } @@ -493,7 +494,7 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) { cat( ' ', paste(names(x$params), paste0('"', unlist(x$params), '"'), - sep=' = ', collapse=', '), '\n', sep='') + sep = ' = ', collapse = ', '), '\n', sep = '') } # TODO: need an interface to access all the xgboosts parameters @@ -505,9 +506,9 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) { if (verbose) { cat( paste(paste0(' ',names(attrs)), paste0('"', unlist(attrs), '"'), - sep=' = ', collapse='\n'), '\n', sep='') + sep = ' = ', collapse = '\n'), '\n', sep = '') } else { - cat(' ', paste(names(attrs), collapse=', '), '\n', sep='') + cat(' ', paste(names(attrs), collapse = ', '), '\n', sep = '') } } @@ -522,16 +523,16 @@ print.xgb.Booster <- function(x, verbose=FALSE, ...) { if (!is.null(x$feature_names)) cat('# of features:', length(x$feature_names), '\n') - cat('niter: ', x$niter, '\n', sep='') + cat('niter: ', x$niter, '\n', sep = '') # TODO: uncomment when faster xgb.ntree is implemented #cat('ntree: ', xgb.ntree(x), '\n', sep='') for (n in setdiff(names(x), c('handle', 'raw', 'call', 'params', 'callbacks', 'evaluation_log','niter','feature_names'))) { if (is.atomic(x[[n]])) { - cat(n, ':', x[[n]], '\n', sep=' ') + cat(n, ':', x[[n]], '\n', sep = ' ') } else { - cat(n, ':\n\t', sep=' ') + cat(n, ':\n\t', sep = ' ') print(x[[n]]) } } diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index 44114eab5..72b973e0d 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -1,14 +1,17 @@ #' Construct xgb.DMatrix object #' -#' Contruct xgb.DMatrix object from dense matrix, sparse matrix -#' or local file (that was created previously by saving an \code{xgb.DMatrix}). +#' Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file. +#' Supported input file formats are either a libsvm text file or a binary file that was created previously by +#' \code{\link{xgb.DMatrix.save}}). #' -#' @param data a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename -#' @param info a list of information of the xgb.DMatrix object -#' @param missing Missing is only used when input is dense matrix, pick a float -#' value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values. -# -#' @param ... other information to pass to \code{info}. +#' @param data a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character +#' string representing a filename. +#' @param info a named list of additional information to store in the \code{xgb.DMatrix} object. +#' See \code{\link{setinfo}} for the specific allowed kinds of +#' @param missing a float value to represents missing values in data (used only when input is a dense matrix). +#' It is useful when a 0 or some other extreme value represents missing values in data. +#' @param silent whether to suppress printing an informational message after loading from a file. +#' @param ... the \code{info} data could be passed directly as parameters, without creating an \code{info} list. #' #' @examples #' data(agaricus.train, package='xgboost') @@ -17,19 +20,19 @@ #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') #' dtrain <- xgb.DMatrix('xgb.DMatrix.data') #' @export -xgb.DMatrix <- function(data, info = list(), missing = NA, ...) { +xgb.DMatrix <- function(data, info = list(), missing = NA, silent = FALSE, ...) { cnames <- NULL if (typeof(data) == "character") { if (length(data) > 1) stop("'data' has class 'character' and length ", length(data), ".\n 'data' accepts either a numeric matrix or a single filename.") - handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE), + handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(silent), PACKAGE = "xgboost") } else if (is.matrix(data)) { handle <- .Call("XGDMatrixCreateFromMat_R", data, missing, PACKAGE = "xgboost") cnames <- colnames(data) - } else if (class(data) == "dgCMatrix") { + } else if (inherits(data, "dgCMatrix")) { handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x, nrow(data), PACKAGE = "xgboost") cnames <- colnames(data) @@ -51,10 +54,9 @@ xgb.DMatrix <- function(data, info = list(), missing = NA, ...) { # get dmatrix from data, label # internal helper method xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) { - inClass <- class(data) - if ("dgCMatrix" %in% inClass || "matrix" %in% inClass ) { + if (inherits(data, "dgCMatrix") || is.matrix(data)) { if (is.null(label)) { - stop("xgboost: need label when data is a matrix") + stop("label must be provided when data is a matrix") } dtrain <- xgb.DMatrix(data, label = label, missing = missing) if (!is.null(weight)){ @@ -64,11 +66,11 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) { if (!is.null(label)) { warning("xgboost: label will be ignored.") } - if (inClass == "character") { - dtrain <- xgb.DMatrix(data) - } else if (inClass == "xgb.DMatrix") { + if (is.character(data)) { + dtrain <- xgb.DMatrix(data[1]) + } else if (inherits(data, "xgb.DMatrix")) { dtrain <- data - } else if ("data.frame" %in% inClass) { + } else if (inherits(data, "data.frame")) { stop("xgboost doesn't support data.frame as input. Convert it to matrix first.") } else { stop("xgboost: invalid input data") @@ -98,8 +100,8 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) { #' #' @export dim.xgb.DMatrix <- function(x) { - c(.Call("XGDMatrixNumRow_R", x, PACKAGE="xgboost"), - .Call("XGDMatrixNumCol_R", x, PACKAGE="xgboost")) + c(.Call("XGDMatrixNumRow_R", x, PACKAGE = "xgboost"), + .Call("XGDMatrixNumCol_R", x, PACKAGE = "xgboost")) } @@ -297,8 +299,8 @@ slice <- function(object, ...) UseMethod("slice") #' @rdname slice.xgb.DMatrix #' @export slice.xgb.DMatrix <- function(object, idxset, ...) { - if (class(object) != "xgb.DMatrix") { - stop("slice: first argument dtrain must be xgb.DMatrix") + if (!inherits(object, "xgb.DMatrix")) { + stop("object must be xgb.DMatrix") } ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset, PACKAGE = "xgboost") @@ -317,7 +319,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) { #' @rdname slice.xgb.DMatrix #' @export -`[.xgb.DMatrix` <- function(object, idxset, colset=NULL) { +`[.xgb.DMatrix` <- function(object, idxset, colset = NULL) { slice(object, idxset) } @@ -341,7 +343,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) { #' #' @method print xgb.DMatrix #' @export -print.xgb.DMatrix <- function(x, verbose=FALSE, ...) { +print.xgb.DMatrix <- function(x, verbose = FALSE, ...) { cat('xgb.DMatrix dim:', nrow(x), 'x', ncol(x), ' info: ') infos <- c() if(length(getinfo(x, 'label')) > 0) infos <- 'label' @@ -353,7 +355,7 @@ print.xgb.DMatrix <- function(x, verbose=FALSE, ...) { cat(' colnames:') if (verbose & !is.null(cnames)) { cat("\n'") - cat(cnames, sep="','") + cat(cnames, sep = "','") cat("'") } else { if (is.null(cnames)) cat(' no') diff --git a/R-package/R/xgb.DMatrix.save.R b/R-package/R/xgb.DMatrix.save.R index 9ceec801a..4ba8c498c 100644 --- a/R-package/R/xgb.DMatrix.save.R +++ b/R-package/R/xgb.DMatrix.save.R @@ -15,9 +15,9 @@ xgb.DMatrix.save <- function(dmatrix, fname) { if (typeof(fname) != "character") stop("fname must be character") - if (class(dmatrix) != "xgb.DMatrix") - stop("the input data must be xgb.DMatrix") + if (!inherits(dmatrix, "xgb.DMatrix")) + stop("dmatrix must be xgb.DMatrix") - .Call("XGDMatrixSaveBinary_R", dmatrix, fname, 0L, PACKAGE = "xgboost") + .Call("XGDMatrixSaveBinary_R", dmatrix, fname[1], 0L, PACKAGE = "xgboost") return(TRUE) } diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index f576b2430..68d50e0e3 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -130,13 +130,13 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = # stop("Either 'eval_metric' or 'feval' must be provided for CV") # Check the labels - if ( (class(data) == 'xgb.DMatrix' && is.null(getinfo(data, 'label'))) || - (class(data) != 'xgb.DMatrix' && is.null(label))) + if ( (inherits(data, 'xgb.DMatrix') && is.null(getinfo(data, 'label'))) || + (!inherits(data, 'xgb.DMatrix') && is.null(label))) stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix") # CV folds if(!is.null(folds)) { - if(class(folds) != "list" || length(folds) < 2) + if(!is.list(folds) || length(folds) < 2) stop("'folds' must be a list with 2 or more elements that are vectors of indices for each CV-fold") nfold <- length(folds) } else { @@ -153,7 +153,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = params <- c(params, list(silent = 1)) print_every_n <- max( as.integer(print_every_n), 1L) if (!has.callbacks(callbacks, 'cb.print.evaluation') && verbose) { - callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n, showsd=showsd)) + callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n, showsd = showsd)) } # evaluation log callback: always is on in CV evaluation_log <- list() @@ -165,12 +165,12 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = if (!is.null(early_stopping_rounds) && !has.callbacks(callbacks, 'cb.early.stop')) { callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds, - maximize=maximize, verbose=verbose)) + maximize = maximize, verbose = verbose)) } # CV-predictions callback if (prediction && !has.callbacks(callbacks, 'cb.cv.predict')) { - callbacks <- add.cb(callbacks, cb.cv.predict(save_models=FALSE)) + callbacks <- add.cb(callbacks, cb.cv.predict(save_models = FALSE)) } # Sort the callbacks into categories cb <- categorize.callbacks(callbacks) @@ -182,7 +182,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = dtest <- slice(dall, folds[[k]]) dtrain <- slice(dall, unlist(folds[-k])) handle <- xgb.Booster.handle(params, list(dtrain, dtest)) - list(dtrain=dtrain, bst=handle, watchlist=list(train=dtrain, test=dtest), index=folds[[k]]) + list(dtrain = dtrain, bst = handle, watchlist = list(train = dtrain, test=dtest), index = folds[[k]]) }) # a "basket" to collect some results from callbacks basket <- list() @@ -212,7 +212,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = if (stop_condition) break } - for (f in cb$finalize) f(finalize=TRUE) + for (f in cb$finalize) f(finalize = TRUE) # the CV result ret <- list( @@ -254,8 +254,8 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = #' @rdname print.xgb.cv #' @method print xgb.cv.synchronous #' @export -print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) { - cat('##### xgb.cv ', length(x$folds), '-folds\n', sep='') +print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) { + cat('##### xgb.cv ', length(x$folds), '-folds\n', sep = '') if (verbose) { if (!is.null(x$call)) { @@ -267,7 +267,7 @@ print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) { cat( ' ', paste(names(x$params), paste0('"', unlist(x$params), '"'), - sep=' = ', collapse=', '), '\n', sep='') + sep = ' = ', collapse = ', '), '\n', sep = '') } if (!is.null(x$callbacks) && length(x$callbacks) > 0) { cat('callbacks:\n') @@ -280,7 +280,7 @@ print.xgb.cv.synchronous <- function(x, verbose=FALSE, ...) { for (n in c('niter', 'best_iteration', 'best_ntreelimit')) { if (is.null(x[[n]])) next - cat(n, ': ', x[[n]], '\n', sep='') + cat(n, ': ', x[[n]], '\n', sep = '') } if (!is.null(x$pred)) { diff --git a/R-package/R/xgb.dump.R b/R-package/R/xgb.dump.R index f05961221..b9c857012 100644 --- a/R-package/R/xgb.dump.R +++ b/R-package/R/xgb.dump.R @@ -39,19 +39,19 @@ #' cat(xgb.dump(bst, with_stats = TRUE, dump_format='json')) #' #' @export -xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE, +xgb.dump <- function(model, fname = NULL, fmap = "", with_stats=FALSE, dump_format = c("text", "json"), ...) { check.deprecation(...) dump_format <- match.arg(dump_format) - if (class(model) != "xgb.Booster") + if (!inherits(model, "xgb.Booster")) stop("model: argument must be of type xgb.Booster") - if (!(class(fname) %in% c("character", "NULL") && length(fname) <= 1)) - stop("fname: argument must be of type character (when provided)") - if (!(class(fmap) %in% c("character", "NULL") && length(fmap) <= 1)) - stop("fmap: argument must be of type character (when provided)") + if (!(is.null(fname) || is.character(fname))) + stop("fname: argument must be a character string (when provided)") + if (!(is.null(fmap) || is.character(fmap))) + stop("fmap: argument must be a character string (when provided)") model <- xgb.Booster.complete(model) - model_dump <- .Call("XGBoosterDumpModel_R", model$handle, fmap, as.integer(with_stats), + model_dump <- .Call("XGBoosterDumpModel_R", model$handle, NVL(fmap, "")[1], as.integer(with_stats), as.character(dump_format), PACKAGE = "xgboost") if (is.null(fname)) @@ -65,7 +65,7 @@ xgb.dump <- function(model = NULL, fname = NULL, fmap = "", with_stats=FALSE, if (is.null(fname)) { return(model_dump) } else { - writeLines(model_dump, fname) + writeLines(model_dump, fname[1]) return(TRUE) } } diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index 4fb1f08c4..15de5bfa2 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -58,13 +58,13 @@ xgb.importance <- function(feature_names = NULL, model = NULL, if (!(is.null(data) && is.null(label) && is.null(target))) warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated") - if (class(model) != "xgb.Booster") - stop("Either 'model' has to be an object of class xgb.Booster") + if (!inherits(model, "xgb.Booster")) + stop("model: must be an object of class xgb.Booster") if (is.null(feature_names) && !is.null(model$feature_names)) feature_names <- model$feature_names - if (!class(feature_names) %in% c("character", "NULL")) + if (!(is.null(feature_names) || is.character(feature_names))) stop("feature_names: Has to be a character vector") model_text_dump <- xgb.dump(model = model, with_stats = TRUE) @@ -76,6 +76,8 @@ xgb.importance <- function(feature_names = NULL, model = NULL, as.numeric if(is.null(feature_names)) feature_names <- seq(to = length(weights)) + if (length(feature_names) != length(weights)) + stop("feature_names has less elements than there are features used in the model") result <- data.table(Feature = feature_names, Weight = weights)[order(-abs(Weight))] } else { # tree model diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R index 75adf5f95..12706ec55 100644 --- a/R-package/R/xgb.model.dt.tree.R +++ b/R-package/R/xgb.model.dt.tree.R @@ -14,6 +14,8 @@ #' It could be useful, e.g., in multiclass classification to get only #' the trees of one certain class. IMPORTANT: the tree index in xgboost models #' is zero-based (e.g., use \code{trees = 0:4} for first 5 trees). +#' @param use_int_id a logical flag indicating whether nodes in columns "Yes", "No", "Missing" should be +#' represented as integers (when FALSE) or as "Tree-Node" character strings (when FALSE). #' @param ... currently not used. #' #' @return @@ -22,9 +24,9 @@ #' The columns of the \code{data.table} are: #' #' \itemize{ -#' \item \code{Tree}: ID of a tree in a model (integer) -#' \item \code{Node}: integer ID of a node in a tree (integer) -#' \item \code{ID}: identifier of a node in a model (character) +#' \item \code{Tree}: integer ID of a tree in a model (zero-based index) +#' \item \code{Node}: integer ID of a node in a tree (zero-based index) +#' \item \code{ID}: character identifier of a node in a model (only when \code{use_int_id=FALSE}) #' \item \code{Feature}: for a branch node, it's a feature id or name (when available); #' for a leaf note, it simply labels it as \code{'Leaf'} #' \item \code{Split}: location of the split for a branch node (split condition is always "less than") @@ -36,6 +38,10 @@ #' or collected by a leaf during training. #' } #' +#' When \code{use_int_id=FALSE}, columns "Yes", "No", and "Missing" point to model-wide node identifiers +#' in the "ID" column. When \code{use_int_id=TRUE}, those columns point to node identifiers from +#' the corresponding trees in the "Node" column. +#' #' @examples #' # Basic use: #' @@ -45,8 +51,9 @@ #' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") #' #' (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst)) -#' # This bst has feature_names stored in it, so those would be used when -#' # the feature_names parameter is not provided: +#' +#' # This bst model already has feature_names stored with it, so those would be used when +#' # feature_names is not set: #' (dt <- xgb.model.dt.tree(model = bst)) #' #' # How to match feature names of splits that are following a current 'Yes' branch: @@ -55,24 +62,24 @@ #' #' @export xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, - trees = NULL, ...){ + trees = NULL, use_int_id = FALSE, ...){ check.deprecation(...) - if (class(model) != "xgb.Booster" & class(text) != "character") { - stop("Either 'model' has to be an object of class xgb.Booster\n", - " or 'text' has to be a character vector with the result of xgb.dump\n", - " (or NULL if the model was provided).") + if (!inherits(model, "xgb.Booster") & !is.character(text)) { + stop("Either 'model' must be an object of class xgb.Booster\n", + " or 'text' must be a character vector with the result of xgb.dump\n", + " (or NULL if 'model' was provided).") } if (is.null(feature_names) && !is.null(model) && !is.null(model$feature_names)) feature_names <- model$feature_names - if (!class(feature_names) %in% c("character", "NULL")) { - stop("feature_names: Has to be a character vector") + if (!(is.null(feature_names) || is.character(feature_names))) { + stop("feature_names: must be a character vector") } - if (!class(trees) %in% c("integer", "numeric", "NULL")) { - stop("trees: Has to be a vector of integers.") + if (!(is.null(trees) || is.numeric(trees))) { + stop("trees: must be a vector of integers.") } if (is.null(text)){ @@ -86,11 +93,11 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, position <- which(!is.na(stri_match_first_regex(text, "booster"))) - add.tree.id <- function(x, i) paste(i, x, sep = "-") + add.tree.id <- function(node, tree) if (use_int_id) node else paste(tree, node, sep = "-") anynumber_regex <- "[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?" - td <- data.table(t=text) + td <- data.table(t = text) td[position, Tree := 1L] td[, Tree := cumsum(ifelse(is.na(Tree), 0L, Tree)) - 1L] @@ -102,32 +109,43 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL, td <- td[Tree %in% trees & !grepl('^booster', t)] td[, Node := stri_match_first_regex(t, "(\\d+):")[,2] %>% as.integer ] - td[, ID := add.tree.id(Node, Tree)] + if (!use_int_id) td[, ID := add.tree.id(Node, Tree)] td[, isLeaf := !is.na(stri_match_first_regex(t, "leaf"))] # parse branch lines - td[isLeaf==FALSE, c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover") := { - rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),", - "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")") - # skip some indices with spurious capture groups from anynumber_regex - xtr <- stri_match_first_regex(t, rx)[, c(2,3,5,6,7,8,10)] - xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree) - lapply(1:ncol(xtr), function(i) xtr[,i]) - }] + branch_rx <- paste0("f(\\d+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),", + "gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")") + branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Quality", "Cover") + td[isLeaf == FALSE, + (branch_cols) := { + # skip some indices with spurious capture groups from anynumber_regex + xtr <- stri_match_first_regex(t, branch_rx)[, c(2,3,5,6,7,8,10), drop = FALSE] + xtr[, 3:5] <- add.tree.id(xtr[, 3:5], Tree) + lapply(1:ncol(xtr), function(i) xtr[,i]) + }] # assign feature_names when available - td[isLeaf==FALSE & !is.null(feature_names), - Feature := feature_names[as.numeric(Feature) + 1] ] + if (!is.null(feature_names)) { + if (length(feature_names) <= max(as.numeric(td$Feature), na.rm = TRUE)) + stop("feature_names has less elements than there are features used in the model") + td[isLeaf == FALSE, Feature := feature_names[as.numeric(Feature) + 1] ] + } # parse leaf lines - td[isLeaf==TRUE, c("Feature", "Quality", "Cover") := { - rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")") - xtr <- stri_match_first_regex(t, rx)[, c(2,4)] - c("Leaf", lapply(1:ncol(xtr), function(i) xtr[,i])) - }] + leaf_rx <- paste0("leaf=(", anynumber_regex, "),cover=(", anynumber_regex, ")") + leaf_cols <- c("Feature", "Quality", "Cover") + td[isLeaf == TRUE, + (leaf_cols) := { + xtr <- stri_match_first_regex(t, leaf_rx)[, c(2,4)] + c("Leaf", lapply(1:ncol(xtr), function(i) xtr[,i])) + }] # convert some columns to numeric numeric_cols <- c("Split", "Quality", "Cover") - td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols=numeric_cols] + td[, (numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols] + if (use_int_id) { + int_cols <- c("Yes", "No", "Missing") + td[, (int_cols) := lapply(.SD, as.integer), .SDcols = int_cols] + } td[, t := NULL] td[, isLeaf := NULL] diff --git a/R-package/R/xgb.plot.deepness.R b/R-package/R/xgb.plot.deepness.R index e8fceaba5..bfbf61a89 100644 --- a/R-package/R/xgb.plot.deepness.R +++ b/R-package/R/xgb.plot.deepness.R @@ -63,7 +63,7 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.depth", "med.weight"), plot = TRUE, ...) { - if (!(class(model) == "xgb.Booster" || is.data.table(model))) + if (!(inherits(model, "xgb.Booster") || is.data.table(model))) stop("model: Has to be either an xgb.Booster model generaged by the xgb.train function\n", "or a data.table result of the xgb.importance function") @@ -73,14 +73,14 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d which <- match.arg(which) dt_tree <- model - if (class(model) == "xgb.Booster") + if (inherits(model, "xgb.Booster")) dt_tree <- xgb.model.dt.tree(model = model) if (!all(c("Feature", "Tree", "ID", "Yes", "No", "Cover") %in% colnames(dt_tree))) stop("Model tree columns are not as expected!\n", " Note that this function works only for tree models.") - dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight=Quality)], by = "ID") + dt_depths <- merge(get.leaf.depth(dt_tree), dt_tree[, .(ID, Cover, Weight = Quality)], by = "ID") setkeyv(dt_depths, c("Tree", "ID")) # count by depth levels, and also calculate average cover at a depth dt_summaries <- dt_depths[, .(.N, Cover = mean(Cover)), Depth] @@ -89,13 +89,13 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d if (plot) { if (which == "2x1") { op <- par(no.readonly = TRUE) - par(mfrow=c(2,1), + par(mfrow = c(2,1), oma = c(3,1,3,1) + 0.1, mar = c(1,4,1,0) + 0.1) - dt_summaries[, barplot(N, border=NA, ylab = 'Number of leafs', ...)] + dt_summaries[, barplot(N, border = NA, ylab = 'Number of leafs', ...)] - dt_summaries[, barplot(Cover, border=NA, ylab = "Weighted cover", names.arg=Depth, ...)] + dt_summaries[, barplot(Cover, border = NA, ylab = "Weighted cover", names.arg = Depth, ...)] title("Model complexity", xlab = "Leaf depth", outer = TRUE, line = 1) par(op) @@ -119,8 +119,8 @@ xgb.plot.deepness <- function(model = NULL, which = c("2x1", "max.depth", "med.d get.leaf.depth <- function(dt_tree) { # extract tree graph's edges dt_edges <- rbindlist(list( - dt_tree[Feature != "Leaf", .(ID, To=Yes, Tree)], - dt_tree[Feature != "Leaf", .(ID, To=No, Tree)] + dt_tree[Feature != "Leaf", .(ID, To = Yes, Tree)], + dt_tree[Feature != "Leaf", .(ID, To = No, Tree)] )) # whether "To" is a leaf: dt_edges <- diff --git a/R-package/R/xgb.plot.importance.R b/R-package/R/xgb.plot.importance.R index 9363f1ae4..bff9e41c4 100644 --- a/R-package/R/xgb.plot.importance.R +++ b/R-package/R/xgb.plot.importance.R @@ -61,8 +61,8 @@ xgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure = NULL, rel_to_first = FALSE, left_margin = 10, cex = NULL, plot = TRUE, ...) { check.deprecation(...) - if (!"data.table" %in% class(importance_matrix)) { - stop("importance_matrix: Should be a data.table.") + if (!is.data.table(importance_matrix)) { + stop("importance_matrix: must be a data.table") } imp_names <- colnames(importance_matrix) @@ -107,12 +107,12 @@ xgb.plot.importance <- function(importance_matrix = NULL, top_n = NULL, measure # reverse the order of rows to have the highest ranked at the top importance_matrix[nrow(importance_matrix):1, - barplot(Importance, horiz=TRUE, border=NA, cex.names=cex, - names.arg=Feature, las=1, ...)] + barplot(Importance, horiz = TRUE, border = NA, cex.names = cex, + names.arg = Feature, las = 1, ...)] grid(NULL, NA) # redraw over the grid importance_matrix[nrow(importance_matrix):1, - barplot(Importance, horiz=TRUE, border=NA, add=TRUE)] + barplot(Importance, horiz = TRUE, border = NA, add = TRUE)] par(op) } diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R index 313cc9d6f..b5ed3445c 100644 --- a/R-package/R/xgb.plot.tree.R +++ b/R-package/R/xgb.plot.tree.R @@ -11,7 +11,7 @@ #' @param plot_width the width of the diagram in pixels. #' @param plot_height the height of the diagram in pixels. #' @param render a logical flag for whether the graph should be rendered (see Value). -#' @param show_node_id a logical flag for whether to include node id's in the graph. +#' @param show_node_id a logical flag for whether to show node id's in the graph. #' @param ... currently not used. #' #' @details @@ -53,17 +53,25 @@ #' bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3, #' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") #' # plot all the trees -#' xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst) -#' # plot only the first tree and include the node ID: -#' xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst, -#' trees = 0, show_node_id = TRUE) +#' xgb.plot.tree(model = bst) +#' # plot only the first tree and display the node ID: +#' xgb.plot.tree(model = bst, trees = 0, show_node_id = TRUE) +#' +#' \dontrun{ +#' # Below is an example of how to save this plot to a file. +#' # Note that for `export_graph` to work, the DiagrammeRsvg and rsvg packages must also be installed. +#' library(DiagrammeR) +#' gr <- xgb.plot.tree(model=bst, trees=0:1, render=FALSE) +#' export_graph(gr, 'tree.pdf', width=1500, height=1900) +#' export_graph(gr, 'tree.png', width=1500, height=1900) +#' } #' #' @export xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL, render = TRUE, show_node_id = FALSE, ...){ check.deprecation(...) - if (class(model) != "xgb.Booster") { - stop("model: Has to be an object of class xgb.Booster model generaged by the xgb.train function.") + if (!inherits(model, "xgb.Booster")) { + stop("model: Has to be an object of class xgb.Booster") } if (!requireNamespace("DiagrammeR", quietly = TRUE)) { diff --git a/R-package/R/xgb.save.R b/R-package/R/xgb.save.R index 8162f0fa2..5d95ee664 100644 --- a/R-package/R/xgb.save.R +++ b/R-package/R/xgb.save.R @@ -32,10 +32,11 @@ xgb.save <- function(model, fname) { if (typeof(fname) != "character") stop("fname must be character") - if (class(model) != "xgb.Booster") - stop("the input must be xgb.Booster. Use xgb.DMatrix.save to save xgb.DMatrix object.") - + if (!inherits(model, "xgb.Booster")) { + stop("model must be xgb.Booster.", + if (inherits(model, "xgb.DMatrix")) " Use xgb.DMatrix.save to save an xgb.DMatrix object." else "") + } model <- xgb.Booster.complete(model, saveraw = FALSE) - .Call("XGBoosterSaveModel_R", model$handle, fname, PACKAGE = "xgboost") + .Call("XGBoosterSaveModel_R", model$handle, fname[1], PACKAGE = "xgboost") return(TRUE) } diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index a9eb863ad..7712e8775 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -233,7 +233,7 @@ #' @rdname xgb.train #' @export xgb.train <- function(params = list(), data, nrounds, watchlist = list(), - obj = NULL, feval = NULL, verbose = 1, print_every_n=1L, + obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L, early_stopping_rounds = NULL, maximize = NULL, save_period = NULL, save_name = "xgboost.model", xgb_model = NULL, callbacks = list(), ...) { @@ -247,11 +247,11 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(), # data & watchlist checks dtrain <- data - if (class(dtrain) != "xgb.DMatrix") + if (!inherits(dtrain, "xgb.DMatrix")) stop("second argument dtrain must be xgb.DMatrix") if (length(watchlist) > 0) { if (typeof(watchlist) != "list" || - !all(sapply(watchlist, class) == "xgb.DMatrix")) + !all(sapply(watchlist, inherits, 'xgb.DMatrix'))) stop("watchlist must be a list of xgb.DMatrix elements") evnames <- names(watchlist) if (is.null(evnames) || any(evnames == "")) @@ -281,7 +281,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(), if (!is.null(early_stopping_rounds) && !has.callbacks(callbacks, 'cb.early.stop')) { callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds, - maximize=maximize, verbose=verbose)) + maximize = maximize, verbose = verbose)) } # Sort the callbacks into categories cb <- categorize.callbacks(callbacks) @@ -332,7 +332,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(), if (stop_condition) break } - for (f in cb$finalize) f(finalize=TRUE) + for (f in cb$finalize) f(finalize = TRUE) bst <- xgb.Booster.complete(bst, saveraw = TRUE) @@ -343,7 +343,7 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(), if (length(evaluation_log) > 0 && nrow(evaluation_log) > 0) { # include the previous compatible history when available - if (class(xgb_model) == 'xgb.Booster' && + if (inherits(xgb_model, 'xgb.Booster') && !is_update && !is.null(xgb_model$evaluation_log) && all.equal(colnames(evaluation_log), diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index f2ce90b12..11a96c269 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -1,4 +1,4 @@ -# Simple interface for training an xgboost model that wraps \code{xgb.train} +# Simple interface for training an xgboost model that wraps \code{xgb.train}. # Its documentation is combined with xgb.train. # #' @rdname xgb.train @@ -7,7 +7,7 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL, params = list(), nrounds, verbose = 1, print_every_n = 1L, early_stopping_rounds = NULL, maximize = NULL, - save_period = 0, save_name = "xgboost.model", + save_period = NULL, save_name = "xgboost.model", xgb_model = NULL, callbacks = list(), ...) { dtrain <- xgb.get.DMatrix(data, label, missing, weight) diff --git a/R-package/README.md b/R-package/README.md index b66006fbd..afc414ef1 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -19,49 +19,6 @@ We are [on CRAN](https://cran.r-project.org/web/packages/xgboost/index.html) now install.packages('xgboost') ``` -You can also install from our weekly updated drat repo: -```r -install.packages("xgboost", repos=c("http://dmlc.ml/drat/", getOption("repos")), type="source") -``` - -***Important*** Due to the usage of submodule, `install_github` is no longer support to install the -latest version of R package. -For up-to-date version, please install from github. - -Windows users will need to install [RTools](https://cran.r-project.org/bin/windows/Rtools/) first. They also need to download [MinGW-W64](http://iweb.dl.sourceforge.net/project/mingw-w64/Toolchains%20targetting%20Win32/Personal%20Builds/mingw-builds/installer/mingw-w64-install.exe) using x86_64 architecture during installation. - -Run the following command to add MinGW to PATH in Windows if not already added. - -```cmd -PATH %PATH%;C:\Program Files\mingw-w64\x86_64-5.3.0-posix-seh-rt_v4-rev0\mingw64\bin -``` - -To compile xgboost at the root of your storage, run the following bash script. - -```bash -git clone --recursive https://github.com/dmlc/xgboost -cd xgboost -git submodule init -git submodule update -alias make='mingw32-make' -cd dmlc-core -make -j4 -cd ../rabit -make lib/librabit_empty.a -j4 -cd .. -cp make/mingw64.mk config.mk -make -j4 -``` - -Run the following R script to install xgboost package from the root directory. - -```r -install.packages('devtools') # if not installed -setwd('C:/xgboost/') -library(devtools) -install('R-package') -``` - For more detailed installation instructions, please see [here](http://xgboost.readthedocs.org/en/latest/build.html#r-package-installation). Examples @@ -69,3 +26,8 @@ Examples * Please visit [walk through example](demo). * See also the [example scripts](../demo/kaggle-higgs) for Kaggle Higgs Challenge, including [speedtest script](../demo/kaggle-higgs/speedtest.R) on this dataset and the one related to [Otto challenge](../demo/kaggle-otto), including a [RMarkdown documentation](../demo/kaggle-otto/understandingXGBoostModel.Rmd). + +Development +----------- + +* See the [R Package section](https://xgboost.readthedocs.io/en/latest/how_to/contribute.html#r-package) of the contributiors guide. diff --git a/R-package/man/xgb.Booster.complete.Rd b/R-package/man/xgb.Booster.complete.Rd index 725c11bd5..44b3c039c 100644 --- a/R-package/man/xgb.Booster.complete.Rd +++ b/R-package/man/xgb.Booster.complete.Rd @@ -28,8 +28,8 @@ E.g., when an \code{xgb.Booster} model is saved as an R object and then is loade its handle (pointer) to an internal xgboost model would be invalid. The majority of xgboost methods should still work for such a model object since those methods would be using \code{xgb.Booster.complete} internally. However, one might find it to be more efficient to call the -\code{xgb.Booster.complete} function once after loading a model as an R-object. That which would -prevent further reconstruction (potentially, multiple times) of an internal booster model. +\code{xgb.Booster.complete} function explicitely once after loading a model as an R-object. +That would prevent further repeated implicit reconstruction of an internal booster model. } \examples{ @@ -41,6 +41,7 @@ saveRDS(bst, "xgb.model.rds") bst1 <- readRDS("xgb.model.rds") # the handle is invalid: print(bst1$handle) + bst1 <- xgb.Booster.complete(bst1) # now the handle points to a valid internal booster model: print(bst1$handle) diff --git a/R-package/man/xgb.DMatrix.Rd b/R-package/man/xgb.DMatrix.Rd index 7f38c01ed..6e40a2b00 100644 --- a/R-package/man/xgb.DMatrix.Rd +++ b/R-package/man/xgb.DMatrix.Rd @@ -2,23 +2,28 @@ % Please edit documentation in R/xgb.DMatrix.R \name{xgb.DMatrix} \alias{xgb.DMatrix} -\title{Contruct xgb.DMatrix object} +\title{Construct xgb.DMatrix object} \usage{ -xgb.DMatrix(data, info = list(), missing = NA, ...) +xgb.DMatrix(data, info = list(), missing = NA, silent = FALSE, ...) } \arguments{ -\item{data}{a \code{matrix} object, a \code{dgCMatrix} object or a character representing a filename} +\item{data}{a \code{matrix} object (either numeric or integer), a \code{dgCMatrix} object, or a character +string representing a filename.} -\item{info}{a list of information of the xgb.DMatrix object} +\item{info}{a named list of additional information to store in the \code{xgb.DMatrix} object. +See \code{\link{setinfo}} for the specific allowed kinds of} -\item{missing}{Missing is only used when input is dense matrix, pick a float -value that represents missing value. Sometime a data use 0 or other extreme value to represents missing values.} +\item{missing}{a float value to represents missing values in data (used only when input is a dense matrix). +It is useful when a 0 or some other extreme value represents missing values in data.} -\item{...}{other information to pass to \code{info}.} +\item{silent}{whether to suppress printing an informational message after loading from a file.} + +\item{...}{the \code{info} data could be passed directly as parameters, without creating an \code{info} list.} } \description{ -Contruct xgb.DMatrix object from dense matrix, sparse matrix -or local file (that was created previously by saving an \code{xgb.DMatrix}). +Construct xgb.DMatrix object from either a dense matrix, a sparse matrix, or a local file. +Supported input file formats are either a libsvm text file or a binary file that was created previously by +\code{\link{xgb.DMatrix.save}}). } \examples{ data(agaricus.train, package='xgboost') diff --git a/R-package/man/xgb.dump.Rd b/R-package/man/xgb.dump.Rd index 411c456b3..922574464 100644 --- a/R-package/man/xgb.dump.Rd +++ b/R-package/man/xgb.dump.Rd @@ -4,7 +4,7 @@ \alias{xgb.dump} \title{Dump an xgboost model in text format.} \usage{ -xgb.dump(model = NULL, fname = NULL, fmap = "", with_stats = FALSE, +xgb.dump(model, fname = NULL, fmap = "", with_stats = FALSE, dump_format = c("text", "json"), ...) } \arguments{ diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd index 59dad6190..a5acc4226 100644 --- a/R-package/man/xgb.model.dt.tree.Rd +++ b/R-package/man/xgb.model.dt.tree.Rd @@ -5,7 +5,7 @@ \title{Parse a boosted tree model text dump} \usage{ xgb.model.dt.tree(feature_names = NULL, model = NULL, text = NULL, - trees = NULL, ...) + trees = NULL, use_int_id = FALSE, ...) } \arguments{ \item{feature_names}{character vector of feature names. If the model already @@ -24,6 +24,9 @@ It could be useful, e.g., in multiclass classification to get only the trees of one certain class. IMPORTANT: the tree index in xgboost models is zero-based (e.g., use \code{trees = 0:4} for first 5 trees).} +\item{use_int_id}{a logical flag indicating whether nodes in columns "Yes", "No", "Missing" should be +represented as integers (when FALSE) or as "Tree-Node" character strings (when FALSE).} + \item{...}{currently not used.} } \value{ @@ -32,9 +35,9 @@ A \code{data.table} with detailed information about model trees' nodes. The columns of the \code{data.table} are: \itemize{ - \item \code{Tree}: ID of a tree in a model (integer) - \item \code{Node}: integer ID of a node in a tree (integer) - \item \code{ID}: identifier of a node in a model (character) + \item \code{Tree}: integer ID of a tree in a model (zero-based index) + \item \code{Node}: integer ID of a node in a tree (zero-based index) + \item \code{ID}: character identifier of a node in a model (only when \code{use_int_id=FALSE}) \item \code{Feature}: for a branch node, it's a feature id or name (when available); for a leaf note, it simply labels it as \code{'Leaf'} \item \code{Split}: location of the split for a branch node (split condition is always "less than") @@ -44,7 +47,11 @@ The columns of the \code{data.table} are: \item \code{Quality}: either the split gain (change in loss) or the leaf value \item \code{Cover}: metric related to the number of observation either seen by a split or collected by a leaf during training. -} +} + +When \code{use_int_id=FALSE}, columns "Yes", "No", and "Missing" point to model-wide node identifiers +in the "ID" column. When \code{use_int_id=TRUE}, those columns point to node identifiers from +the corresponding trees in the "Node" column. } \description{ Parse a boosted tree model text dump into a \code{data.table} structure. @@ -58,8 +65,9 @@ bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_dep eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") (dt <- xgb.model.dt.tree(colnames(agaricus.train$data), bst)) -# This bst has feature_names stored in it, so those would be used when -# the feature_names parameter is not provided: + +# This bst model already has feature_names stored with it, so those would be used when +# feature_names is not set: (dt <- xgb.model.dt.tree(model = bst)) # How to match feature names of splits that are following a current 'Yes' branch: diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd index f32a8b7e0..15685a157 100644 --- a/R-package/man/xgb.plot.tree.Rd +++ b/R-package/man/xgb.plot.tree.Rd @@ -24,7 +24,7 @@ IMPORTANT: the tree index in xgboost model is zero-based \item{render}{a logical flag for whether the graph should be rendered (see Value).} -\item{show_node_id}{a logical flag for whether to include node id's in the graph.} +\item{show_node_id}{a logical flag for whether to show node id's in the graph.} \item{...}{currently not used.} } @@ -68,9 +68,17 @@ data(agaricus.train, package='xgboost') bst <- xgboost(data = agaricus.train$data, label = agaricus.train$label, max_depth = 3, eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") # plot all the trees -xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst) -# plot only the first tree and include the node ID: -xgb.plot.tree(feature_names = colnames(agaricus.train$data), model = bst, - trees = 0, show_node_id = TRUE) +xgb.plot.tree(model = bst) +# plot only the first tree and display the node ID: +xgb.plot.tree(model = bst, trees = 0, show_node_id = TRUE) + +\dontrun{ +# Below is an example of how to save this plot to a file. +# Note that for `export_graph` to work, the DiagrammeRsvg and rsvg packages must also be installed. +library(DiagrammeR) +gr <- xgb.plot.tree(model=bst, trees=0:1, render=FALSE) +export_graph(gr, 'tree.pdf', width=1500, height=1900) +export_graph(gr, 'tree.png', width=1500, height=1900) +} } diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index c05a8fe7c..37a5db995 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -12,7 +12,7 @@ xgb.train(params = list(), data, nrounds, watchlist = list(), obj = NULL, xgboost(data = NULL, label = NULL, missing = NA, weight = NULL, params = list(), nrounds, verbose = 1, print_every_n = 1L, - early_stopping_rounds = NULL, maximize = NULL, save_period = 0, + early_stopping_rounds = NULL, maximize = NULL, save_period = NULL, save_name = "xgboost.model", xgb_model = NULL, callbacks = list(), ...) } \arguments{ diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index 3dfafdd00..7bf3bc0a8 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -68,12 +68,19 @@ SEXP XGDMatrixCreateFromMat_R(SEXP mat, SEXP dim = getAttrib(mat, R_DimSymbol); size_t nrow = static_cast(INTEGER(dim)[0]); size_t ncol = static_cast(INTEGER(dim)[1]); - double *din = REAL(mat); + const bool is_int = TYPEOF(mat) == INTSXP; + double *din; + int *iin; + if (is_int) { + iin = INTEGER(mat); + } else { + din = REAL(mat); + } std::vector data(nrow * ncol); #pragma omp parallel for schedule(static) for (omp_ulong i = 0; i < nrow; ++i) { for (size_t j = 0; j < ncol; ++j) { - data[i * ncol +j] = din[i + nrow * j]; + data[i * ncol +j] = is_int ? static_cast(iin[i + nrow * j]) : din[i + nrow * j]; } } DMatrixHandle handle; diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 951f46217..3b11a0614 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -189,3 +189,36 @@ test_that("xgb.cv works", { expect_false(is.null(cv$callbacks)) expect_false(is.null(cv$call)) }) + +test_that("train and predict with non-strict classes", { + # standard dense matrix input + train_dense <- as.matrix(train$data) + bst <- xgboost(data = train_dense, label = train$label, max_depth = 2, + eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0) + pr0 <- predict(bst, train_dense) + + # dense matrix-like input of non-matrix class + class(train_dense) <- 'shmatrix' + expect_true(is.matrix(train_dense)) + expect_error( + bst <- xgboost(data = train_dense, label = train$label, max_depth = 2, + eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0) + , regexp = NA) + expect_error(pr <- predict(bst, train_dense), regexp = NA) + expect_equal(pr0, pr) + + # dense matrix-like input of non-matrix class with some inheritance + class(train_dense) <- c('pphmatrix','shmatrix') + expect_true(is.matrix(train_dense)) + expect_error( + bst <- xgboost(data = train_dense, label = train$label, max_depth = 2, + eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0) + , regexp = NA) + expect_error(pr <- predict(bst, train_dense), regexp = NA) + expect_equal(pr0, pr) + + # when someone inhertis from xgb.Booster, it should still be possible to use it as xgb.Booster + class(bst) <- c('super.Booster', 'xgb.Booster') + expect_error(pr <- predict(bst, train_dense), regexp = NA) + expect_equal(pr0, pr) +}) diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R index 965e3f480..efc0ca0d6 100644 --- a/R-package/tests/testthat/test_dmatrix.R +++ b/R-package/tests/testthat/test_dmatrix.R @@ -7,18 +7,30 @@ data(agaricus.test, package='xgboost') test_data <- agaricus.test$data[1:100,] test_label <- agaricus.test$label[1:100] -test_that("xgb.DMatrix: basic construction, saving, loading", { +test_that("xgb.DMatrix: basic construction", { # from sparse matrix dtest1 <- xgb.DMatrix(test_data, label=test_label) + # from dense matrix dtest2 <- xgb.DMatrix(as.matrix(test_data), label=test_label) expect_equal(getinfo(dtest1, 'label'), getinfo(dtest2, 'label')) + expect_equal(dim(dtest1), dim(dtest2)) + #from dense integer matrix + int_data <- as.matrix(test_data) + storage.mode(int_data) <- "integer" + dtest3 <- xgb.DMatrix(int_data, label=test_label) + expect_equal(dim(dtest1), dim(dtest3)) +}) + +test_that("xgb.DMatrix: saving, loading", { # save to a local file + dtest1 <- xgb.DMatrix(test_data, label=test_label) tmp_file <- tempfile('xgb.DMatrix_') expect_true(xgb.DMatrix.save(dtest1, tmp_file)) # read from a local file - dtest3 <- xgb.DMatrix(tmp_file) + expect_output(dtest3 <- xgb.DMatrix(tmp_file), "entries loaded from") + expect_output(dtest3 <- xgb.DMatrix(tmp_file, silent = TRUE), NA) unlink(tmp_file) expect_equal(getinfo(dtest1, 'label'), getinfo(dtest3, 'label')) @@ -26,7 +38,7 @@ test_that("xgb.DMatrix: basic construction, saving, loading", { tmp <- c("0 1:1 2:1","1 3:1","0 1:1") tmp_file <- 'tmp.libsvm' writeLines(tmp, tmp_file) - dtest4 <- xgb.DMatrix(tmp_file) + dtest4 <- xgb.DMatrix(tmp_file, silent = TRUE) expect_equal(dim(dtest4), c(3, 4)) expect_equal(getinfo(dtest4, 'label'), c(0,1,0)) unlink(tmp_file) diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index b12d98d29..84df814c8 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -133,6 +133,12 @@ test_that("xgb.model.dt.tree works with and without feature names", { dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.x) expect_output(str(dt.tree.x), 'Feature.*\\"3\\"') expect_equal(dt.tree[, -4, with=FALSE], dt.tree.x[, -4, with=FALSE]) + + # using integer node ID instead of character + dt.tree.int <- xgb.model.dt.tree(model = bst.Tree, use_int_id = TRUE) + expect_equal(as.integer(tstrsplit(dt.tree$Yes, '-')[[2]]), dt.tree.int$Yes) + expect_equal(as.integer(tstrsplit(dt.tree$No, '-')[[2]]), dt.tree.int$No) + expect_equal(as.integer(tstrsplit(dt.tree$Missing, '-')[[2]]), dt.tree.int$Missing) }) test_that("xgb.model.dt.tree throws error for gblinear", { @@ -169,6 +175,17 @@ test_that("xgb.importance works with GLM model", { xgb.ggplot.importance(importance.GLM) }) +test_that("xgb.model.dt.tree and xgb.importance work with a single split model", { + bst1 <- xgboost(data = sparse_matrix, label = label, max_depth = 1, + eta = 1, nthread = 2, nrounds = 1, verbose = 0, + objective = "binary:logistic") + expect_error(dt <- xgb.model.dt.tree(model = bst1), regexp = NA) # no error + expect_equal(nrow(dt), 3) + expect_error(imp <- xgb.importance(model = bst1), regexp = NA) # no error + expect_equal(nrow(imp), 1) + expect_equal(imp$Gain, 1) +}) + test_that("xgb.plot.tree works with and without feature names", { xgb.plot.tree(feature_names = feature.names, model = bst.Tree) xgb.plot.tree(model = bst.Tree) diff --git a/doc/how_to/contribute.md b/doc/how_to/contribute.md index c056313f0..bad0455ad 100644 --- a/doc/how_to/contribute.md +++ b/doc/how_to/contribute.md @@ -120,6 +120,7 @@ R Package make rcpplint ``` - When needed, you can disable the linter warning of certain line with ```// NOLINT(*)``` comments. +- We use [roxygen](https://cran.r-project.org/web/packages/roxygen2/vignettes/roxygen2.html) for documenting the R package. ### Rmarkdown Vignettes Rmarkdown vignettes are placed in [R-package/vignettes](../R-package/vignettes) @@ -143,3 +144,17 @@ make the-markdown-to-make.md make html ``` The reason we do this is to avoid exploded repo size due to generated images sizes. + +### R package versioning +Since version 0.6.4.3, we have adopted a versioning system that uses an ```x.y.z``` (or ```core_major.core_minor.cran_release```) +format for CRAN releases and an ```x.y.z.p``` (or ```core_major.core_minor.cran_release.patch```) format for development patch versions. +This approach is similar to the one described in Yihui Xie's +[blog post on R Package Versioning](https://yihui.name/en/2013/06/r-package-versioning/), +except we need an additional field to accomodate the ```x.y``` core library version. + +Each new CRAN release bumps up the 3rd field, while developments in-between CRAN releases +would be marked by an additional 4th field on the top of an existing CRAN release version. +Some additional consideration is needed when the core library version changes. +E.g., after the core changes from 0.6 to 0.7, the R package development version would become 0.7.0.1, working towards +a 0.7.1 CRAN release. The 0.7.0 would not be released to CRAN, unless it would require almost no additional development. +