merge latest changes
This commit is contained in:
@@ -65,6 +65,6 @@ Imports:
|
||||
data.table (>= 1.9.6),
|
||||
jsonlite (>= 1.0)
|
||||
Roxygen: list(markdown = TRUE)
|
||||
RoxygenNote: 7.2.3
|
||||
RoxygenNote: 7.3.0
|
||||
Encoding: UTF-8
|
||||
SystemRequirements: GNU make, C++17
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# Generated by roxygen2: do not edit by hand
|
||||
|
||||
S3method("[",xgb.Booster)
|
||||
S3method("[",xgb.DMatrix)
|
||||
S3method("dimnames<-",xgb.DMatrix)
|
||||
S3method(coef,xgb.Booster)
|
||||
@@ -7,6 +8,7 @@ S3method(dim,xgb.DMatrix)
|
||||
S3method(dimnames,xgb.DMatrix)
|
||||
S3method(getinfo,xgb.Booster)
|
||||
S3method(getinfo,xgb.DMatrix)
|
||||
S3method(length,xgb.Booster)
|
||||
S3method(predict,xgb.Booster)
|
||||
S3method(print,xgb.Booster)
|
||||
S3method(print,xgb.DMatrix)
|
||||
@@ -62,6 +64,7 @@ export(xgb.plot.tree)
|
||||
export(xgb.save)
|
||||
export(xgb.save.raw)
|
||||
export(xgb.set.config)
|
||||
export(xgb.slice.Booster)
|
||||
export(xgb.train)
|
||||
export(xgboost)
|
||||
import(methods)
|
||||
|
||||
@@ -280,7 +280,6 @@ cb.reset.parameters <- function(new_params) {
|
||||
#' \code{iteration},
|
||||
#' \code{begin_iteration},
|
||||
#' \code{end_iteration},
|
||||
#' \code{num_parallel_tree}.
|
||||
#'
|
||||
#' @seealso
|
||||
#' \code{\link{callbacks}},
|
||||
@@ -291,7 +290,6 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
metric_name = NULL, verbose = TRUE) {
|
||||
# state variables
|
||||
best_iteration <- -1
|
||||
best_ntreelimit <- -1
|
||||
best_score <- Inf
|
||||
best_msg <- NULL
|
||||
metric_idx <- 1
|
||||
@@ -358,12 +356,10 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
# If the difference is due to floating-point truncation, update best_score
|
||||
best_score <- attr_best_score
|
||||
}
|
||||
xgb.attr(env$bst, "best_iteration") <- best_iteration
|
||||
xgb.attr(env$bst, "best_ntreelimit") <- best_ntreelimit
|
||||
xgb.attr(env$bst, "best_iteration") <- best_iteration - 1
|
||||
xgb.attr(env$bst, "best_score") <- best_score
|
||||
} else {
|
||||
env$basket$best_iteration <- best_iteration
|
||||
env$basket$best_ntreelimit <- best_ntreelimit
|
||||
}
|
||||
}
|
||||
|
||||
@@ -385,14 +381,13 @@ cb.early.stop <- function(stopping_rounds, maximize = FALSE,
|
||||
)
|
||||
best_score <<- score
|
||||
best_iteration <<- i
|
||||
best_ntreelimit <<- best_iteration * env$num_parallel_tree
|
||||
# save the property to attributes, so they will occur in checkpoint
|
||||
if (!is.null(env$bst)) {
|
||||
xgb.attributes(env$bst) <- list(
|
||||
best_iteration = best_iteration - 1, # convert to 0-based index
|
||||
best_score = best_score,
|
||||
best_msg = best_msg,
|
||||
best_ntreelimit = best_ntreelimit)
|
||||
best_msg = best_msg
|
||||
)
|
||||
}
|
||||
} else if (i - best_iteration >= stopping_rounds) {
|
||||
env$stop_condition <- TRUE
|
||||
@@ -475,8 +470,6 @@ cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
|
||||
#' \code{data},
|
||||
#' \code{end_iteration},
|
||||
#' \code{params},
|
||||
#' \code{num_parallel_tree},
|
||||
#' \code{num_class}.
|
||||
#'
|
||||
#' @return
|
||||
#' Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
|
||||
@@ -499,19 +492,21 @@ cb.cv.predict <- function(save_models = FALSE) {
|
||||
stop("'cb.cv.predict' callback requires 'basket' and 'bst_folds' lists in its calling frame")
|
||||
|
||||
N <- nrow(env$data)
|
||||
pred <-
|
||||
if (env$num_class > 1) {
|
||||
matrix(NA_real_, N, env$num_class)
|
||||
} else {
|
||||
rep(NA_real_, N)
|
||||
}
|
||||
pred <- NULL
|
||||
|
||||
iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration) + 1)
|
||||
iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration))
|
||||
if (NVL(env$params[['booster']], '') == 'gblinear') {
|
||||
iterationrange <- c(1, 1) # must be 0 for gblinear
|
||||
iterationrange <- "all"
|
||||
}
|
||||
for (fd in env$bst_folds) {
|
||||
pr <- predict(fd$bst, fd$watchlist[[2]], iterationrange = iterationrange, reshape = TRUE)
|
||||
if (is.null(pred)) {
|
||||
if (NCOL(pr) > 1L) {
|
||||
pred <- matrix(NA_real_, N, ncol(pr))
|
||||
} else {
|
||||
pred <- matrix(NA_real_, N)
|
||||
}
|
||||
}
|
||||
if (is.matrix(pred)) {
|
||||
pred[fd$index, ] <- pr
|
||||
} else {
|
||||
|
||||
@@ -208,7 +208,7 @@ xgb.iter.eval <- function(bst, watchlist, iter, feval) {
|
||||
res <- sapply(seq_along(watchlist), function(j) {
|
||||
w <- watchlist[[j]]
|
||||
## predict using all trees
|
||||
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = c(1, 1))
|
||||
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
|
||||
eval_res <- feval(preds, w)
|
||||
out <- eval_res$value
|
||||
names(out) <- paste0(evnames[j], "-", eval_res$metric)
|
||||
|
||||
@@ -89,7 +89,6 @@ xgb.get.handle <- function(object) {
|
||||
#' @param outputmargin Whether the prediction should be returned in the form of original untransformed
|
||||
#' sum of predictions from boosting iterations' results. E.g., setting `outputmargin=TRUE` for
|
||||
#' logistic regression would return log-odds instead of probabilities.
|
||||
#' @param ntreelimit Deprecated, use `iterationrange` instead.
|
||||
#' @param predleaf Whether to predict pre-tree leaf indices.
|
||||
#' @param predcontrib Whether to return feature contributions to individual predictions (see Details).
|
||||
#' @param approxcontrib Whether to use a fast approximation for feature contributions (see Details).
|
||||
@@ -99,11 +98,17 @@ xgb.get.handle <- function(object) {
|
||||
#' or `predinteraction` is `TRUE`.
|
||||
#' @param training Whether the predictions are used for training. For dart booster,
|
||||
#' training predicting will perform dropout.
|
||||
#' @param iterationrange Specifies which trees are used in prediction. For
|
||||
#' example, take a random forest with 100 rounds.
|
||||
#' With `iterationrange=c(1, 21)`, only the trees built during `[1, 21)` (half open set)
|
||||
#' rounds are used in this prediction. The index is 1-based just like an R vector. When set
|
||||
#' to `c(1, 1)`, XGBoost will use all trees.
|
||||
#' @param iterationrange Sequence of rounds/iterations from the model to use for prediction, specified by passing
|
||||
#' a two-dimensional vector with the start and end numbers in the sequence (same format as R's `seq` - i.e.
|
||||
#' base-1 indexing, and inclusive of both ends).
|
||||
#'
|
||||
#' For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
|
||||
#' predict using only the first one.
|
||||
#'
|
||||
#' If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
|
||||
#' of the iterations (rounds) otherwise.
|
||||
#'
|
||||
#' If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
|
||||
#' @param strict_shape Default is `FALSE`. When set to `TRUE`, the output
|
||||
#' type and shape of predictions are invariant to the model type.
|
||||
#' @param ... Not used.
|
||||
@@ -189,7 +194,7 @@ xgb.get.handle <- function(object) {
|
||||
#' # use all trees by default
|
||||
#' pred <- predict(bst, test$data)
|
||||
#' # use only the 1st tree
|
||||
#' pred1 <- predict(bst, test$data, iterationrange = c(1, 2))
|
||||
#' pred1 <- predict(bst, test$data, iterationrange = c(1, 1))
|
||||
#'
|
||||
#' # Predicting tree leafs:
|
||||
#' # the result is an nsamples X ntrees matrix
|
||||
@@ -260,11 +265,11 @@ xgb.get.handle <- function(object) {
|
||||
#' all.equal(pred, pred_labels)
|
||||
#' # prediction from using only 5 iterations should result
|
||||
#' # in the same error as seen in iteration 5:
|
||||
#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
|
||||
#' pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))
|
||||
#' sum(pred5 != lb) / length(lb)
|
||||
#'
|
||||
#' @export
|
||||
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
|
||||
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE,
|
||||
predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
|
||||
reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) {
|
||||
if (!inherits(newdata, "xgb.DMatrix")) {
|
||||
@@ -275,25 +280,21 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
)
|
||||
}
|
||||
|
||||
if (NVL(xgb.booster_type(object), '') == 'gblinear' || is.null(ntreelimit))
|
||||
ntreelimit <- 0
|
||||
|
||||
if (ntreelimit != 0 && is.null(iterationrange)) {
|
||||
## only ntreelimit, initialize iteration range
|
||||
iterationrange <- c(0, 0)
|
||||
} else if (ntreelimit == 0 && !is.null(iterationrange)) {
|
||||
## only iteration range, handle 1-based indexing
|
||||
iterationrange <- c(iterationrange[1] - 1, iterationrange[2] - 1)
|
||||
} else if (ntreelimit != 0 && !is.null(iterationrange)) {
|
||||
## both are specified, let libgxgboost throw an error
|
||||
if (!is.null(iterationrange)) {
|
||||
if (is.character(iterationrange)) {
|
||||
stopifnot(iterationrange == "all")
|
||||
iterationrange <- c(0, 0)
|
||||
} else {
|
||||
iterationrange[1] <- iterationrange[1] - 1 # base-0 indexing
|
||||
}
|
||||
} else {
|
||||
## no limit is supplied, use best
|
||||
best_iteration <- xgb.best_iteration(object)
|
||||
if (is.null(best_iteration)) {
|
||||
iterationrange <- c(0, 0)
|
||||
} else {
|
||||
## We don't need to + 1 as R is 1-based index.
|
||||
iterationrange <- c(0, as.integer(best_iteration))
|
||||
iterationrange <- c(0, as.integer(best_iteration) + 1L)
|
||||
}
|
||||
}
|
||||
## Handle the 0 length values.
|
||||
@@ -312,7 +313,6 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
strict_shape = box(TRUE),
|
||||
iteration_begin = box(as.integer(iterationrange[1])),
|
||||
iteration_end = box(as.integer(iterationrange[2])),
|
||||
ntree_limit = box(as.integer(ntreelimit)),
|
||||
type = box(as.integer(0))
|
||||
)
|
||||
|
||||
@@ -343,24 +343,24 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
)
|
||||
names(predts) <- c("shape", "results")
|
||||
shape <- predts$shape
|
||||
ret <- predts$results
|
||||
arr <- predts$results
|
||||
|
||||
n_ret <- length(ret)
|
||||
n_ret <- length(arr)
|
||||
n_row <- nrow(newdata)
|
||||
if (n_row != shape[1]) {
|
||||
stop("Incorrect predict shape.")
|
||||
}
|
||||
|
||||
arr <- array(data = ret, dim = rev(shape))
|
||||
.Call(XGSetArrayDimInplace_R, arr, rev(shape))
|
||||
|
||||
cnames <- if (!is.null(colnames(newdata))) c(colnames(newdata), "BIAS") else NULL
|
||||
n_groups <- shape[2]
|
||||
|
||||
## Needed regardless of whether strict shape is being used.
|
||||
if (predcontrib) {
|
||||
dimnames(arr) <- list(cnames, NULL, NULL)
|
||||
.Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, NULL, NULL))
|
||||
} else if (predinteraction) {
|
||||
dimnames(arr) <- list(cnames, cnames, NULL, NULL)
|
||||
.Call(XGSetArrayDimNamesInplace_R, arr, list(cnames, cnames, NULL, NULL))
|
||||
}
|
||||
if (strict_shape) {
|
||||
return(arr) # strict shape is calculated by libxgboost uniformly.
|
||||
@@ -368,43 +368,51 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
|
||||
|
||||
if (predleaf) {
|
||||
## Predict leaf
|
||||
arr <- if (n_ret == n_row) {
|
||||
matrix(arr, ncol = 1)
|
||||
if (n_ret == n_row) {
|
||||
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
|
||||
} else {
|
||||
matrix(arr, nrow = n_row, byrow = TRUE)
|
||||
arr <- matrix(arr, nrow = n_row, byrow = TRUE)
|
||||
}
|
||||
} else if (predcontrib) {
|
||||
## Predict contribution
|
||||
arr <- aperm(a = arr, perm = c(2, 3, 1)) # [group, row, col]
|
||||
arr <- if (n_ret == n_row) {
|
||||
matrix(arr, ncol = 1, dimnames = list(NULL, cnames))
|
||||
if (n_ret == n_row) {
|
||||
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
|
||||
.Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
|
||||
} else if (n_groups != 1) {
|
||||
## turns array into list of matrices
|
||||
lapply(seq_len(n_groups), function(g) arr[g, , ])
|
||||
arr <- lapply(seq_len(n_groups), function(g) arr[g, , ])
|
||||
} else {
|
||||
## remove the first axis (group)
|
||||
dn <- dimnames(arr)
|
||||
matrix(arr[1, , ], nrow = dim(arr)[2], ncol = dim(arr)[3], dimnames = c(dn[2], dn[3]))
|
||||
newdim <- dim(arr)[2:3]
|
||||
newdn <- dimnames(arr)[2:3]
|
||||
arr <- arr[1, , ]
|
||||
.Call(XGSetArrayDimInplace_R, arr, newdim)
|
||||
.Call(XGSetArrayDimNamesInplace_R, arr, newdn)
|
||||
}
|
||||
} else if (predinteraction) {
|
||||
## Predict interaction
|
||||
arr <- aperm(a = arr, perm = c(3, 4, 1, 2)) # [group, row, col, col]
|
||||
arr <- if (n_ret == n_row) {
|
||||
matrix(arr, ncol = 1, dimnames = list(NULL, cnames))
|
||||
if (n_ret == n_row) {
|
||||
.Call(XGSetArrayDimInplace_R, arr, c(n_row, 1L))
|
||||
.Call(XGSetArrayDimNamesInplace_R, arr, list(NULL, cnames))
|
||||
} else if (n_groups != 1) {
|
||||
## turns array into list of matrices
|
||||
lapply(seq_len(n_groups), function(g) arr[g, , , ])
|
||||
arr <- lapply(seq_len(n_groups), function(g) arr[g, , , ])
|
||||
} else {
|
||||
## remove the first axis (group)
|
||||
arr <- arr[1, , , , drop = FALSE]
|
||||
array(arr, dim = dim(arr)[2:4], dimnames(arr)[2:4])
|
||||
newdim <- dim(arr)[2:4]
|
||||
newdn <- dimnames(arr)[2:4]
|
||||
.Call(XGSetArrayDimInplace_R, arr, newdim)
|
||||
.Call(XGSetArrayDimNamesInplace_R, arr, newdn)
|
||||
}
|
||||
} else {
|
||||
## Normal prediction
|
||||
arr <- if (reshape && n_groups != 1) {
|
||||
matrix(arr, ncol = n_groups, byrow = TRUE)
|
||||
if (reshape && n_groups != 1) {
|
||||
arr <- matrix(arr, ncol = n_groups, byrow = TRUE)
|
||||
} else {
|
||||
as.vector(ret)
|
||||
.Call(XGSetArrayDimInplace_R, arr, NULL)
|
||||
}
|
||||
}
|
||||
return(arr)
|
||||
@@ -492,7 +500,7 @@ xgb.attr <- function(object, name) {
|
||||
return(NULL)
|
||||
}
|
||||
if (!is.null(out)) {
|
||||
if (name %in% c("best_iteration", "best_ntreelimit", "best_score")) {
|
||||
if (name %in% c("best_iteration", "best_score")) {
|
||||
out <- as.numeric(out)
|
||||
}
|
||||
}
|
||||
@@ -685,16 +693,94 @@ setinfo.xgb.Booster <- function(object, name, info) {
|
||||
}
|
||||
|
||||
#' @title Get number of boosting in a fitted booster
|
||||
#' @param model A fitted `xgb.Booster` model.
|
||||
#' @param model,x A fitted `xgb.Booster` model.
|
||||
#' @return The number of rounds saved in the model, as an integer.
|
||||
#' @details Note that setting booster parameters related to training
|
||||
#' continuation / updates through \link{xgb.parameters<-} will reset the
|
||||
#' number of rounds to zero.
|
||||
#' @export
|
||||
#' @rdname xgb.get.num.boosted.rounds
|
||||
xgb.get.num.boosted.rounds <- function(model) {
|
||||
return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model)))
|
||||
}
|
||||
|
||||
#' @rdname xgb.get.num.boosted.rounds
|
||||
#' @export
|
||||
length.xgb.Booster <- function(x) {
|
||||
return(xgb.get.num.boosted.rounds(x))
|
||||
}
|
||||
|
||||
#' @title Slice Booster by Rounds
|
||||
#' @description Creates a new booster including only a selected range of rounds / iterations
|
||||
#' from an existing booster, as given by the sequence `seq(start, end, step)`.
|
||||
#' @details Note that any R attributes that the booster might have, will not be copied into
|
||||
#' the resulting object.
|
||||
#' @param model,x A fitted `xgb.Booster` object, which is to be sliced by taking only a subset
|
||||
#' of its rounds / iterations.
|
||||
#' @param start Start of the slice (base-1 and inclusive, like R's \link{seq}).
|
||||
#' @param end End of the slice (base-1 and inclusive, like R's \link{seq}).
|
||||
#'
|
||||
#' Passing a value of zero here is equivalent to passing the full number of rounds in the
|
||||
#' booster object.
|
||||
#' @param step Step size of the slice. Passing '1' will take every round in the sequence defined by
|
||||
#' `(start, end)`, while passing '2' will take every second value, and so on.
|
||||
#' @return A sliced booster object containing only the requested rounds.
|
||||
#' @examples
|
||||
#' data(mtcars)
|
||||
#' y <- mtcars$mpg
|
||||
#' x <- as.matrix(mtcars[, -1])
|
||||
#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
#' model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
|
||||
#' model_slice <- xgb.slice.Booster(model, 1, 3)
|
||||
#' # Prediction for first three rounds
|
||||
#' predict(model, x, predleaf = TRUE)[, 1:3]
|
||||
#'
|
||||
#' # The new model has only those rounds, so
|
||||
#' # a full prediction from it is equivalent
|
||||
#' predict(model_slice, x, predleaf = TRUE)
|
||||
#' @export
|
||||
#' @rdname xgb.slice.Booster
|
||||
xgb.slice.Booster <- function(model, start, end = xgb.get.num.boosted.rounds(model), step = 1L) {
|
||||
# This makes the slice mimic the behavior of R's 'seq',
|
||||
# which truncates on the end of the slice when the step
|
||||
# doesn't reach it.
|
||||
if (end > start && step > 1) {
|
||||
d <- (end - start + 1) / step
|
||||
if (d != floor(d)) {
|
||||
end <- start + step * ceiling(d) - 1
|
||||
}
|
||||
}
|
||||
return(
|
||||
.Call(
|
||||
XGBoosterSlice_R,
|
||||
xgb.get.handle(model),
|
||||
start - 1,
|
||||
end,
|
||||
step
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
#' @export
|
||||
#' @rdname xgb.slice.Booster
|
||||
#' @param i The indices - must be an increasing sequence as generated by e.g. `seq(...)`.
|
||||
`[.xgb.Booster` <- function(x, i) {
|
||||
if (missing(i)) {
|
||||
return(xgb.slice.Booster(x, 1, 0))
|
||||
}
|
||||
if (length(i) == 1) {
|
||||
return(xgb.slice.Booster(x, i, i))
|
||||
}
|
||||
steps <- diff(i)
|
||||
if (any(steps < 0)) {
|
||||
stop("Can only slice booster with ascending sequences.")
|
||||
}
|
||||
if (length(unique(steps)) > 1) {
|
||||
stop("Can only slice booster with fixed-step sequences.")
|
||||
}
|
||||
return(xgb.slice.Booster(x, i[1L], i[length(i)], steps[1L]))
|
||||
}
|
||||
|
||||
#' @title Get Features Names from Booster
|
||||
#' @description Returns the feature / variable / column names from a fitted
|
||||
#' booster object, which are set automatically during the call to \link{xgb.train}
|
||||
@@ -710,12 +796,6 @@ variable.names.xgb.Booster <- function(object, ...) {
|
||||
return(getinfo(object, "feature_name"))
|
||||
}
|
||||
|
||||
xgb.ntree <- function(bst) {
|
||||
config <- xgb.config(bst)
|
||||
out <- strtoi(config$learner$gradient_booster$gbtree_model_param$num_trees)
|
||||
return(out)
|
||||
}
|
||||
|
||||
xgb.nthread <- function(bst) {
|
||||
config <- xgb.config(bst)
|
||||
out <- strtoi(config$learner$generic_param$nthread)
|
||||
|
||||
@@ -103,7 +103,6 @@
|
||||
#' parameter or randomly generated.
|
||||
#' \item \code{best_iteration} iteration number with the best evaluation metric value
|
||||
#' (only available with early stopping).
|
||||
#' \item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
|
||||
#' \item \code{pred} CV prediction values available when \code{prediction} is set.
|
||||
#' It is either vector or matrix (see \code{\link{cb.cv.predict}}).
|
||||
#' \item \code{models} a list of the CV folds' models. It is only available with the explicit
|
||||
@@ -218,7 +217,6 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
|
||||
|
||||
# extract parameters that can affect the relationship b/w #trees and #iterations
|
||||
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
|
||||
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
|
||||
|
||||
# those are fixed for CV (no training continuation)
|
||||
begin_iteration <- 1
|
||||
@@ -318,7 +316,7 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
|
||||
})
|
||||
}
|
||||
|
||||
for (n in c('niter', 'best_iteration', 'best_ntreelimit')) {
|
||||
for (n in c('niter', 'best_iteration')) {
|
||||
if (is.null(x[[n]]))
|
||||
next
|
||||
cat(n, ': ', x[[n]], '\n', sep = '')
|
||||
|
||||
@@ -113,19 +113,12 @@
|
||||
#' xgb.importance(model = mbst)
|
||||
#'
|
||||
#' @export
|
||||
xgb.importance <- function(feature_names = NULL, model = NULL, trees = NULL,
|
||||
xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature_name"), trees = NULL,
|
||||
data = NULL, label = NULL, target = NULL) {
|
||||
|
||||
if (!(is.null(data) && is.null(label) && is.null(target)))
|
||||
warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated")
|
||||
|
||||
if (is.null(feature_names)) {
|
||||
model_feature_names <- xgb.feature_names(model)
|
||||
if (NROW(model_feature_names)) {
|
||||
feature_names <- model_feature_names
|
||||
}
|
||||
}
|
||||
|
||||
if (!(is.null(feature_names) || is.character(feature_names)))
|
||||
stop("feature_names: Has to be a character vector")
|
||||
|
||||
|
||||
@@ -2,11 +2,8 @@
|
||||
#'
|
||||
#' Parse a boosted tree model text dump into a `data.table` structure.
|
||||
#'
|
||||
#' @param feature_names Character vector of feature names. If the model already
|
||||
#' contains feature names, those will be used when \code{feature_names=NULL} (default value).
|
||||
#'
|
||||
#' Note that, if the model already contains feature names, it's \bold{not} possible to override them here.
|
||||
#' @param model Object of class `xgb.Booster`.
|
||||
#' @param model Object of class `xgb.Booster`. If it contains feature names (they can be set through
|
||||
#' \link{setinfo}), they will be used in the output from this function.
|
||||
#' @param text Character vector previously generated by the function [xgb.dump()]
|
||||
#' (called with parameter `with_stats = TRUE`). `text` takes precedence over `model`.
|
||||
#' @param trees An integer vector of tree indices that should be used.
|
||||
@@ -58,7 +55,7 @@
|
||||
#'
|
||||
#' # This bst model already has feature_names stored with it, so those would be used when
|
||||
#' # feature_names is not set:
|
||||
#' (dt <- xgb.model.dt.tree(model = bst))
|
||||
#' dt <- xgb.model.dt.tree(bst)
|
||||
#'
|
||||
#' # How to match feature names of splits that are following a current 'Yes' branch:
|
||||
#' merge(
|
||||
@@ -69,7 +66,7 @@
|
||||
#' ]
|
||||
#'
|
||||
#' @export
|
||||
xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
xgb.model.dt.tree <- function(model = NULL, text = NULL,
|
||||
trees = NULL, use_int_id = FALSE, ...) {
|
||||
check.deprecation(...)
|
||||
|
||||
@@ -79,24 +76,15 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
" (or NULL if 'model' was provided).")
|
||||
}
|
||||
|
||||
model_feature_names <- NULL
|
||||
if (inherits(model, "xgb.Booster")) {
|
||||
model_feature_names <- xgb.feature_names(model)
|
||||
if (NROW(model_feature_names) && !is.null(feature_names)) {
|
||||
stop("'model' contains feature names. Cannot override them.")
|
||||
}
|
||||
}
|
||||
if (is.null(feature_names) && !is.null(model) && !is.null(model_feature_names))
|
||||
feature_names <- model_feature_names
|
||||
|
||||
if (!(is.null(feature_names) || is.character(feature_names))) {
|
||||
stop("feature_names: must be a character vector")
|
||||
}
|
||||
|
||||
if (!(is.null(trees) || is.numeric(trees))) {
|
||||
stop("trees: must be a vector of integers.")
|
||||
}
|
||||
|
||||
feature_names <- NULL
|
||||
if (inherits(model, "xgb.Booster")) {
|
||||
feature_names <- xgb.feature_names(model)
|
||||
}
|
||||
|
||||
from_text <- TRUE
|
||||
if (is.null(text)) {
|
||||
text <- xgb.dump(model = model, with_stats = TRUE)
|
||||
@@ -134,7 +122,7 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
branch_rx_w_names <- paste0("\\d+:\\[(.+)<(", anynumber_regex, ")\\] yes=(\\d+),no=(\\d+),missing=(\\d+),",
|
||||
"gain=(", anynumber_regex, "),cover=(", anynumber_regex, ")")
|
||||
text_has_feature_names <- FALSE
|
||||
if (NROW(model_feature_names)) {
|
||||
if (NROW(feature_names)) {
|
||||
branch_rx <- branch_rx_w_names
|
||||
text_has_feature_names <- TRUE
|
||||
} else {
|
||||
@@ -148,9 +136,6 @@ xgb.model.dt.tree <- function(feature_names = NULL, model = NULL, text = NULL,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (text_has_feature_names && is.null(model) && !is.null(feature_names)) {
|
||||
stop("'text' contains feature names. Cannot override them.")
|
||||
}
|
||||
branch_cols <- c("Feature", "Split", "Yes", "No", "Missing", "Gain", "Cover")
|
||||
td[
|
||||
isLeaf == FALSE,
|
||||
|
||||
@@ -62,13 +62,13 @@
|
||||
#' }
|
||||
#'
|
||||
#' @export
|
||||
xgb.plot.multi.trees <- function(model, feature_names = NULL, features_keep = 5, plot_width = NULL, plot_height = NULL,
|
||||
xgb.plot.multi.trees <- function(model, features_keep = 5, plot_width = NULL, plot_height = NULL,
|
||||
render = TRUE, ...) {
|
||||
if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
|
||||
stop("DiagrammeR is required for xgb.plot.multi.trees")
|
||||
}
|
||||
check.deprecation(...)
|
||||
tree.matrix <- xgb.model.dt.tree(feature_names = feature_names, model = model)
|
||||
tree.matrix <- xgb.model.dt.tree(model = model)
|
||||
|
||||
# first number of the path represents the tree, then the following numbers are related to the path to follow
|
||||
# root init
|
||||
|
||||
@@ -2,9 +2,8 @@
|
||||
#'
|
||||
#' Read a tree model text dump and plot the model.
|
||||
#'
|
||||
#' @param feature_names Character vector used to overwrite the feature names
|
||||
#' of the model. The default (`NULL`) uses the original feature names.
|
||||
#' @param model Object of class `xgb.Booster`.
|
||||
#' @param model Object of class `xgb.Booster`. If it contains feature names (they can be set through
|
||||
#' \link{setinfo}), they will be used in the output from this function.
|
||||
#' @param trees An integer vector of tree indices that should be used.
|
||||
#' The default (`NULL`) uses all trees.
|
||||
#' Useful, e.g., in multiclass classification to get only
|
||||
@@ -103,7 +102,7 @@
|
||||
#' }
|
||||
#'
|
||||
#' @export
|
||||
xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL,
|
||||
xgb.plot.tree <- function(model = NULL, trees = NULL, plot_width = NULL, plot_height = NULL,
|
||||
render = TRUE, show_node_id = FALSE, style = c("R", "xgboost"), ...) {
|
||||
check.deprecation(...)
|
||||
if (!inherits(model, "xgb.Booster")) {
|
||||
@@ -120,17 +119,12 @@ xgb.plot.tree <- function(feature_names = NULL, model = NULL, trees = NULL, plot
|
||||
if (NROW(trees) != 1L || !render || show_node_id) {
|
||||
stop("style='xgboost' is only supported for single, rendered tree, without node IDs.")
|
||||
}
|
||||
if (!is.null(feature_names)) {
|
||||
stop(
|
||||
"style='xgboost' cannot override 'feature_names'. Will automatically take them from the model."
|
||||
)
|
||||
}
|
||||
|
||||
txt <- xgb.dump(model, dump_format = "dot")
|
||||
return(DiagrammeR::grViz(txt[[trees + 1]], width = plot_width, height = plot_height))
|
||||
}
|
||||
|
||||
dt <- xgb.model.dt.tree(feature_names = feature_names, model = model, trees = trees)
|
||||
dt <- xgb.model.dt.tree(model = model, trees = trees)
|
||||
|
||||
dt[, label := paste0(Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "\nValue: ", "\nGain: "), Gain)]
|
||||
if (show_node_id)
|
||||
|
||||
@@ -393,7 +393,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
|
||||
# Note: it might look like these aren't used, but they need to be defined in this
|
||||
# environment for the callbacks for work correctly.
|
||||
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
|
||||
num_parallel_tree <- max(as.numeric(NVL(params[['num_parallel_tree']], 1)), 1) # nolint
|
||||
|
||||
if (is_update && nrounds > niter_init)
|
||||
stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
|
||||
|
||||
@@ -15,7 +15,7 @@ cat('start testing prediction from first n trees\n')
|
||||
labels <- getinfo(dtest, 'label')
|
||||
|
||||
### predict using first 1 tree
|
||||
ypred1 <- predict(bst, dtest, ntreelimit = 1)
|
||||
ypred1 <- predict(bst, dtest, iterationrange = c(1, 1))
|
||||
# by default, we predict using all the trees
|
||||
ypred2 <- predict(bst, dtest)
|
||||
|
||||
|
||||
@@ -35,8 +35,6 @@ Callback function expects the following values to be set in its calling frame:
|
||||
\code{data},
|
||||
\code{end_iteration},
|
||||
\code{params},
|
||||
\code{num_parallel_tree},
|
||||
\code{num_class}.
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{callbacks}}
|
||||
|
||||
@@ -55,7 +55,6 @@ Callback function expects the following values to be set in its calling frame:
|
||||
\code{iteration},
|
||||
\code{begin_iteration},
|
||||
\code{end_iteration},
|
||||
\code{num_parallel_tree}.
|
||||
}
|
||||
\seealso{
|
||||
\code{\link{callbacks}},
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
newdata,
|
||||
missing = NA,
|
||||
outputmargin = FALSE,
|
||||
ntreelimit = NULL,
|
||||
predleaf = FALSE,
|
||||
predcontrib = FALSE,
|
||||
approxcontrib = FALSE,
|
||||
@@ -36,8 +35,6 @@ missing values in data (e.g., 0 or some other extreme value).}
|
||||
sum of predictions from boosting iterations' results. E.g., setting \code{outputmargin=TRUE} for
|
||||
logistic regression would return log-odds instead of probabilities.}
|
||||
|
||||
\item{ntreelimit}{Deprecated, use \code{iterationrange} instead.}
|
||||
|
||||
\item{predleaf}{Whether to predict pre-tree leaf indices.}
|
||||
|
||||
\item{predcontrib}{Whether to return feature contributions to individual predictions (see Details).}
|
||||
@@ -53,11 +50,18 @@ or \code{predinteraction} is \code{TRUE}.}
|
||||
\item{training}{Whether the predictions are used for training. For dart booster,
|
||||
training predicting will perform dropout.}
|
||||
|
||||
\item{iterationrange}{Specifies which trees are used in prediction. For
|
||||
example, take a random forest with 100 rounds.
|
||||
With \code{iterationrange=c(1, 21)}, only the trees built during \verb{[1, 21)} (half open set)
|
||||
rounds are used in this prediction. The index is 1-based just like an R vector. When set
|
||||
to \code{c(1, 1)}, XGBoost will use all trees.}
|
||||
\item{iterationrange}{Sequence of rounds/iterations from the model to use for prediction, specified by passing
|
||||
a two-dimensional vector with the start and end numbers in the sequence (same format as R's \code{seq} - i.e.
|
||||
base-1 indexing, and inclusive of both ends).
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ For example, passing `c(1,20)` will predict using the first twenty iterations, while passing `c(1,1)` will
|
||||
predict using only the first one.
|
||||
|
||||
If passing `NULL`, will either stop at the best iteration if the model used early stopping, or use all
|
||||
of the iterations (rounds) otherwise.
|
||||
|
||||
If passing "all", will use all of the rounds regardless of whether the model had early stopping or not.
|
||||
}\if{html}{\out{</div>}}}
|
||||
|
||||
\item{strict_shape}{Default is \code{FALSE}. When set to \code{TRUE}, the output
|
||||
type and shape of predictions are invariant to the model type.}
|
||||
@@ -145,7 +149,7 @@ bst <- xgb.train(
|
||||
# use all trees by default
|
||||
pred <- predict(bst, test$data)
|
||||
# use only the 1st tree
|
||||
pred1 <- predict(bst, test$data, iterationrange = c(1, 2))
|
||||
pred1 <- predict(bst, test$data, iterationrange = c(1, 1))
|
||||
|
||||
# Predicting tree leafs:
|
||||
# the result is an nsamples X ntrees matrix
|
||||
@@ -216,7 +220,7 @@ str(pred)
|
||||
all.equal(pred, pred_labels)
|
||||
# prediction from using only 5 iterations should result
|
||||
# in the same error as seen in iteration 5:
|
||||
pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 6))
|
||||
pred5 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 5))
|
||||
sum(pred5 != lb) / length(lb)
|
||||
|
||||
}
|
||||
|
||||
@@ -135,7 +135,6 @@ It is created by the \code{\link{cb.evaluation.log}} callback.
|
||||
parameter or randomly generated.
|
||||
\item \code{best_iteration} iteration number with the best evaluation metric value
|
||||
(only available with early stopping).
|
||||
\item \code{best_ntreelimit} and the \code{ntreelimit} Deprecated attributes, use \code{best_iteration} instead.
|
||||
\item \code{pred} CV prediction values available when \code{prediction} is set.
|
||||
It is either vector or matrix (see \code{\link{cb.cv.predict}}).
|
||||
\item \code{models} a list of the CV folds' models. It is only available with the explicit
|
||||
|
||||
@@ -2,12 +2,15 @@
|
||||
% Please edit documentation in R/xgb.Booster.R
|
||||
\name{xgb.get.num.boosted.rounds}
|
||||
\alias{xgb.get.num.boosted.rounds}
|
||||
\alias{length.xgb.Booster}
|
||||
\title{Get number of boosting in a fitted booster}
|
||||
\usage{
|
||||
xgb.get.num.boosted.rounds(model)
|
||||
|
||||
\method{length}{xgb.Booster}(x)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{A fitted \code{xgb.Booster} model.}
|
||||
\item{model, x}{A fitted \code{xgb.Booster} model.}
|
||||
}
|
||||
\value{
|
||||
The number of rounds saved in the model, as an integer.
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
\title{Feature importance}
|
||||
\usage{
|
||||
xgb.importance(
|
||||
feature_names = NULL,
|
||||
model = NULL,
|
||||
feature_names = getinfo(model, "feature_name"),
|
||||
trees = NULL,
|
||||
data = NULL,
|
||||
label = NULL,
|
||||
@@ -14,11 +14,11 @@ xgb.importance(
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{Object of class \code{xgb.Booster}.}
|
||||
|
||||
\item{feature_names}{Character vector used to overwrite the feature names
|
||||
of the model. The default is \code{NULL} (use original feature names).}
|
||||
|
||||
\item{model}{Object of class \code{xgb.Booster}.}
|
||||
|
||||
\item{trees}{An integer vector of tree indices that should be included
|
||||
into the importance calculation (only for the "gbtree" booster).
|
||||
The default (\code{NULL}) parses all trees.
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
\title{Parse model text dump}
|
||||
\usage{
|
||||
xgb.model.dt.tree(
|
||||
feature_names = NULL,
|
||||
model = NULL,
|
||||
text = NULL,
|
||||
trees = NULL,
|
||||
@@ -14,13 +13,8 @@ xgb.model.dt.tree(
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{feature_names}{Character vector of feature names. If the model already
|
||||
contains feature names, those will be used when \code{feature_names=NULL} (default value).
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ Note that, if the model already contains feature names, it's \\bold\{not\} possible to override them here.
|
||||
}\if{html}{\out{</div>}}}
|
||||
|
||||
\item{model}{Object of class \code{xgb.Booster}.}
|
||||
\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can be set through
|
||||
\link{setinfo}), they will be used in the output from this function.}
|
||||
|
||||
\item{text}{Character vector previously generated by the function \code{\link[=xgb.dump]{xgb.dump()}}
|
||||
(called with parameter \code{with_stats = TRUE}). \code{text} takes precedence over \code{model}.}
|
||||
@@ -81,7 +75,7 @@ bst <- xgboost(
|
||||
|
||||
# This bst model already has feature_names stored with it, so those would be used when
|
||||
# feature_names is not set:
|
||||
(dt <- xgb.model.dt.tree(model = bst))
|
||||
dt <- xgb.model.dt.tree(bst)
|
||||
|
||||
# How to match feature names of splits that are following a current 'Yes' branch:
|
||||
merge(
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
\usage{
|
||||
xgb.plot.multi.trees(
|
||||
model,
|
||||
feature_names = NULL,
|
||||
features_keep = 5,
|
||||
plot_width = NULL,
|
||||
plot_height = NULL,
|
||||
@@ -15,10 +14,8 @@ xgb.plot.multi.trees(
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{model}{Object of class \code{xgb.Booster}.}
|
||||
|
||||
\item{feature_names}{Character vector used to overwrite the feature names
|
||||
of the model. The default (\code{NULL}) uses the original feature names.}
|
||||
\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can be set through
|
||||
\link{setinfo}), they will be used in the output from this function.}
|
||||
|
||||
\item{features_keep}{Number of features to keep in each position of the multi trees,
|
||||
by default 5.}
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
\title{Plot boosted trees}
|
||||
\usage{
|
||||
xgb.plot.tree(
|
||||
feature_names = NULL,
|
||||
model = NULL,
|
||||
trees = NULL,
|
||||
plot_width = NULL,
|
||||
@@ -17,10 +16,8 @@ xgb.plot.tree(
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
\item{feature_names}{Character vector used to overwrite the feature names
|
||||
of the model. The default (\code{NULL}) uses the original feature names.}
|
||||
|
||||
\item{model}{Object of class \code{xgb.Booster}.}
|
||||
\item{model}{Object of class \code{xgb.Booster}. If it contains feature names (they can be set through
|
||||
\link{setinfo}), they will be used in the output from this function.}
|
||||
|
||||
\item{trees}{An integer vector of tree indices that should be used.
|
||||
The default (\code{NULL}) uses all trees.
|
||||
|
||||
57
R-package/man/xgb.slice.Booster.Rd
Normal file
57
R-package/man/xgb.slice.Booster.Rd
Normal file
@@ -0,0 +1,57 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.Booster.R
|
||||
\name{xgb.slice.Booster}
|
||||
\alias{xgb.slice.Booster}
|
||||
\alias{[.xgb.Booster}
|
||||
\title{Slice Booster by Rounds}
|
||||
\usage{
|
||||
xgb.slice.Booster(
|
||||
model,
|
||||
start,
|
||||
end = xgb.get.num.boosted.rounds(model),
|
||||
step = 1L
|
||||
)
|
||||
|
||||
\method{[}{xgb.Booster}(x, i)
|
||||
}
|
||||
\arguments{
|
||||
\item{model, x}{A fitted \code{xgb.Booster} object, which is to be sliced by taking only a subset
|
||||
of its rounds / iterations.}
|
||||
|
||||
\item{start}{Start of the slice (base-1 and inclusive, like R's \link{seq}).}
|
||||
|
||||
\item{end}{End of the slice (base-1 and inclusive, like R's \link{seq}).
|
||||
|
||||
Passing a value of zero here is equivalent to passing the full number of rounds in the
|
||||
booster object.}
|
||||
|
||||
\item{step}{Step size of the slice. Passing '1' will take every round in the sequence defined by
|
||||
\verb{(start, end)}, while passing '2' will take every second value, and so on.}
|
||||
|
||||
\item{i}{The indices - must be an increasing sequence as generated by e.g. \code{seq(...)}.}
|
||||
}
|
||||
\value{
|
||||
A sliced booster object containing only the requested rounds.
|
||||
}
|
||||
\description{
|
||||
Creates a new booster including only a selected range of rounds / iterations
|
||||
from an existing booster, as given by the sequence \code{seq(start, end, step)}.
|
||||
}
|
||||
\details{
|
||||
Note that any R attributes that the booster might have, will not be copied into
|
||||
the resulting object.
|
||||
}
|
||||
\examples{
|
||||
data(mtcars)
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
|
||||
model_slice <- xgb.slice.Booster(model, 1, 3)
|
||||
# Prediction for first three rounds
|
||||
predict(model, x, predleaf = TRUE)[, 1:3]
|
||||
|
||||
# The new model has only those rounds, so
|
||||
# a full prediction from it is equivalent
|
||||
predict(model_slice, x, predleaf = TRUE)
|
||||
}
|
||||
@@ -42,6 +42,8 @@ extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);
|
||||
extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);
|
||||
extern SEXP XGBoosterUpdateOneIter_R(SEXP, SEXP, SEXP);
|
||||
extern SEXP XGCheckNullPtr_R(SEXP);
|
||||
extern SEXP XGSetArrayDimInplace_R(SEXP, SEXP);
|
||||
extern SEXP XGSetArrayDimNamesInplace_R(SEXP, SEXP);
|
||||
extern SEXP XGDMatrixCreateFromCSC_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
|
||||
extern SEXP XGDMatrixCreateFromCSR_R(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
|
||||
extern SEXP XGDMatrixCreateFromFile_R(SEXP, SEXP);
|
||||
@@ -62,6 +64,7 @@ extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP);
|
||||
extern SEXP XGBSetGlobalConfig_R(SEXP);
|
||||
extern SEXP XGBGetGlobalConfig_R(void);
|
||||
extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);
|
||||
extern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP);
|
||||
|
||||
static const R_CallMethodDef CallEntries[] = {
|
||||
{"XGDuplicate_R", (DL_FUNC) &XGDuplicate_R, 1},
|
||||
@@ -90,6 +93,8 @@ static const R_CallMethodDef CallEntries[] = {
|
||||
{"XGBoosterSetParam_R", (DL_FUNC) &XGBoosterSetParam_R, 3},
|
||||
{"XGBoosterUpdateOneIter_R", (DL_FUNC) &XGBoosterUpdateOneIter_R, 3},
|
||||
{"XGCheckNullPtr_R", (DL_FUNC) &XGCheckNullPtr_R, 1},
|
||||
{"XGSetArrayDimInplace_R", (DL_FUNC) &XGSetArrayDimInplace_R, 2},
|
||||
{"XGSetArrayDimNamesInplace_R", (DL_FUNC) &XGSetArrayDimNamesInplace_R, 2},
|
||||
{"XGDMatrixCreateFromCSC_R", (DL_FUNC) &XGDMatrixCreateFromCSC_R, 6},
|
||||
{"XGDMatrixCreateFromCSR_R", (DL_FUNC) &XGDMatrixCreateFromCSR_R, 6},
|
||||
{"XGDMatrixCreateFromFile_R", (DL_FUNC) &XGDMatrixCreateFromFile_R, 2},
|
||||
@@ -110,6 +115,7 @@ static const R_CallMethodDef CallEntries[] = {
|
||||
{"XGBSetGlobalConfig_R", (DL_FUNC) &XGBSetGlobalConfig_R, 1},
|
||||
{"XGBGetGlobalConfig_R", (DL_FUNC) &XGBGetGlobalConfig_R, 0},
|
||||
{"XGBoosterFeatureScore_R", (DL_FUNC) &XGBoosterFeatureScore_R, 2},
|
||||
{"XGBoosterSlice_R", (DL_FUNC) &XGBoosterSlice_R, 4},
|
||||
{NULL, NULL, 0}
|
||||
};
|
||||
|
||||
|
||||
@@ -263,6 +263,16 @@ XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle) {
|
||||
return Rf_ScalarLogical(R_ExternalPtrAddr(handle) == nullptr);
|
||||
}
|
||||
|
||||
XGB_DLL SEXP XGSetArrayDimInplace_R(SEXP arr, SEXP dims) {
|
||||
Rf_setAttrib(arr, R_DimSymbol, dims);
|
||||
return R_NilValue;
|
||||
}
|
||||
|
||||
XGB_DLL SEXP XGSetArrayDimNamesInplace_R(SEXP arr, SEXP dim_names) {
|
||||
Rf_setAttrib(arr, R_DimNamesSymbol, dim_names);
|
||||
return R_NilValue;
|
||||
}
|
||||
|
||||
namespace {
|
||||
void _DMatrixFinalizer(SEXP ext) {
|
||||
R_API_BEGIN();
|
||||
@@ -1279,3 +1289,18 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {
|
||||
|
||||
return r_out;
|
||||
}
|
||||
|
||||
XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) {
|
||||
SEXP out = Rf_protect(XGBMakeEmptyAltrep());
|
||||
R_API_BEGIN();
|
||||
BoosterHandle handle_out = nullptr;
|
||||
CHECK_CALL(XGBoosterSlice(R_ExternalPtrAddr(handle),
|
||||
Rf_asInteger(begin_layer),
|
||||
Rf_asInteger(end_layer),
|
||||
Rf_asInteger(step),
|
||||
&handle_out));
|
||||
XGBAltrepSetPointer(out, handle_out);
|
||||
R_API_END();
|
||||
Rf_unprotect(1);
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -23,6 +23,22 @@
|
||||
*/
|
||||
XGB_DLL SEXP XGCheckNullPtr_R(SEXP handle);
|
||||
|
||||
/*!
|
||||
* \brief set the dimensions of an array in-place
|
||||
* \param arr
|
||||
* \param dims dimensions to set to the array
|
||||
* \return NULL value
|
||||
*/
|
||||
XGB_DLL SEXP XGSetArrayDimInplace_R(SEXP arr, SEXP dims);
|
||||
|
||||
/*!
|
||||
* \brief set the names of the dimensions of an array in-place
|
||||
* \param arr
|
||||
* \param dim_names names for the dimensions to set
|
||||
* \return NULL value
|
||||
*/
|
||||
XGB_DLL SEXP XGSetArrayDimNamesInplace_R(SEXP arr, SEXP dim_names);
|
||||
|
||||
/*!
|
||||
* \brief Set global configuration
|
||||
* \param json_str a JSON string representing the list of key-value pairs
|
||||
@@ -386,4 +402,14 @@ XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle);
|
||||
*/
|
||||
XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);
|
||||
|
||||
/*!
|
||||
* \brief Slice a fitted booster model (by rounds)
|
||||
* \param handle handle to the fitted booster
|
||||
* \param begin_layer start of the slice
|
||||
* \param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round
|
||||
* \param step step size of the slice
|
||||
* \return The sliced booster with the requested rounds only
|
||||
*/
|
||||
XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);
|
||||
|
||||
#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
|
||||
|
||||
@@ -33,15 +33,11 @@ test_that("train and predict binary classification", {
|
||||
pred <- predict(bst, test$data)
|
||||
expect_length(pred, 1611)
|
||||
|
||||
pred1 <- predict(bst, train$data, ntreelimit = 1)
|
||||
pred1 <- predict(bst, train$data, iterationrange = c(1, 1))
|
||||
expect_length(pred1, 6513)
|
||||
err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)
|
||||
err_log <- attributes(bst)$evaluation_log[1, train_error]
|
||||
expect_lt(abs(err_pred1 - err_log), 10e-6)
|
||||
|
||||
pred2 <- predict(bst, train$data, iterationrange = c(1, 2))
|
||||
expect_length(pred1, 6513)
|
||||
expect_equal(pred1, pred2)
|
||||
})
|
||||
|
||||
test_that("parameter validation works", {
|
||||
@@ -117,8 +113,8 @@ test_that("dart prediction works", {
|
||||
nrounds = nrounds,
|
||||
objective = "reg:squarederror"
|
||||
)
|
||||
pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, ntreelimit = 0)
|
||||
pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, ntreelimit = nrounds)
|
||||
pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, iterationrange = NULL)
|
||||
pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, iterationrange = c(1, nrounds))
|
||||
expect_true(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_1, byrow = TRUE)))
|
||||
|
||||
pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE)
|
||||
@@ -139,8 +135,8 @@ test_that("dart prediction works", {
|
||||
data = dtrain,
|
||||
nrounds = nrounds
|
||||
)
|
||||
pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, ntreelimit = 0)
|
||||
pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, ntreelimit = nrounds)
|
||||
pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, iterationrange = NULL)
|
||||
pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, iterationrange = c(1, nrounds))
|
||||
pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)
|
||||
|
||||
expect_true(all(matrix(pred_by_train_0, byrow = TRUE) == matrix(pred_by_xgboost_0, byrow = TRUE)))
|
||||
@@ -162,7 +158,7 @@ test_that("train and predict softprob", {
|
||||
)
|
||||
expect_false(is.null(attributes(bst)$evaluation_log))
|
||||
expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 5)
|
||||
pred <- predict(bst, as.matrix(iris[, -5]))
|
||||
expect_length(pred, nrow(iris) * 3)
|
||||
# row sums add up to total probability of 1:
|
||||
@@ -174,12 +170,12 @@ test_that("train and predict softprob", {
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)
|
||||
# manually calculate error at the 1st iteration:
|
||||
mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 1)
|
||||
mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
|
||||
pred_labels <- max.col(mpred) - 1
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[1, train_merror], err, tolerance = 5e-6)
|
||||
|
||||
mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 2))
|
||||
mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
|
||||
expect_equal(mpred, mpred1)
|
||||
|
||||
d <- cbind(
|
||||
@@ -213,7 +209,7 @@ test_that("train and predict softmax", {
|
||||
)
|
||||
expect_false(is.null(attributes(bst)$evaluation_log))
|
||||
expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 5)
|
||||
|
||||
pred <- predict(bst, as.matrix(iris[, -5]))
|
||||
expect_length(pred, nrow(iris))
|
||||
@@ -233,19 +229,15 @@ test_that("train and predict RF", {
|
||||
watchlist = list(train = xgb.DMatrix(train$data, label = lb))
|
||||
)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 1)
|
||||
expect_equal(xgb.ntree(bst), 20)
|
||||
|
||||
pred <- predict(bst, train$data)
|
||||
pred_err <- sum((pred > 0.5) != lb) / length(lb)
|
||||
expect_lt(abs(attributes(bst)$evaluation_log[1, train_error] - pred_err), 10e-6)
|
||||
# expect_lt(pred_err, 0.03)
|
||||
|
||||
pred <- predict(bst, train$data, ntreelimit = 20)
|
||||
pred <- predict(bst, train$data, iterationrange = c(1, 1))
|
||||
pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
|
||||
expect_equal(pred_err_20, pred_err)
|
||||
|
||||
pred1 <- predict(bst, train$data, iterationrange = c(1, 2))
|
||||
expect_equal(pred, pred1)
|
||||
})
|
||||
|
||||
test_that("train and predict RF with softprob", {
|
||||
@@ -261,7 +253,6 @@ test_that("train and predict RF with softprob", {
|
||||
watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
|
||||
)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 15)
|
||||
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
|
||||
# predict for all iterations:
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
|
||||
expect_equal(dim(pred), c(nrow(iris), 3))
|
||||
@@ -269,7 +260,7 @@ test_that("train and predict RF with softprob", {
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)
|
||||
# predict for 7 iterations and adjust for 4 parallel trees per iteration
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 7 * 4)
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 7))
|
||||
err <- sum((max.col(pred) - 1) != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[7, train_merror], err, tolerance = 5e-6)
|
||||
})
|
||||
|
||||
67
R-package/tests/testthat/test_booster_slicing.R
Normal file
67
R-package/tests/testthat/test_booster_slicing.R
Normal file
@@ -0,0 +1,67 @@
|
||||
context("testing xgb.Booster slicing")
|
||||
|
||||
data(agaricus.train, package = "xgboost")
|
||||
dm <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1)
|
||||
# Note: here need large step sizes in order for the predictions
|
||||
# to have substantially different leaf assignments on each tree
|
||||
model <- xgb.train(
|
||||
params = list(objective = "binary:logistic", nthread = 1, max_depth = 4, eta = 0.5),
|
||||
data = dm,
|
||||
nrounds = 20
|
||||
)
|
||||
pred <- predict(model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
|
||||
test_that("Slicing full model", {
|
||||
new_model <- xgb.slice.Booster(model, 1, 0)
|
||||
expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
|
||||
|
||||
new_model <- model[]
|
||||
expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
|
||||
|
||||
new_model <- model[1:length(model)] # nolint
|
||||
expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
|
||||
})
|
||||
|
||||
test_that("Slicing sequence from start", {
|
||||
new_model <- xgb.slice.Booster(model, 1, 10)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(1, 10)])
|
||||
|
||||
new_model <- model[1:10]
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(1, 10)])
|
||||
})
|
||||
|
||||
test_that("Slicing sequence from middle", {
|
||||
new_model <- xgb.slice.Booster(model, 5, 10)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(5, 10)])
|
||||
|
||||
new_model <- model[5:10]
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(5, 10)])
|
||||
})
|
||||
|
||||
test_that("Slicing with non-unit step", {
|
||||
for (s in 2:5) {
|
||||
new_model <- xgb.slice.Booster(model, 1, 17, s)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(1, 17, s)])
|
||||
|
||||
new_model <- model[seq(1, 17, s)]
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(1, 17, s)])
|
||||
}
|
||||
})
|
||||
|
||||
test_that("Slicing with non-unit step from middle", {
|
||||
for (s in 2:5) {
|
||||
new_model <- xgb.slice.Booster(model, 4, 17, s)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(4, 17, s)])
|
||||
|
||||
new_model <- model[seq(4, 17, s)]
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(4, 17, s)])
|
||||
}
|
||||
})
|
||||
@@ -211,12 +211,11 @@ test_that("early stopping xgb.train works", {
|
||||
, "Stopping. Best iteration")
|
||||
expect_false(is.null(xgb.attr(bst, "best_iteration")))
|
||||
expect_lt(xgb.attr(bst, "best_iteration"), 19)
|
||||
expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
|
||||
|
||||
pred <- predict(bst, dtest)
|
||||
expect_equal(length(pred), 1611)
|
||||
err_pred <- err(ltest, pred)
|
||||
err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_error]
|
||||
err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration") + 1, test_error]
|
||||
expect_equal(err_log, err_pred, tolerance = 5e-6)
|
||||
|
||||
set.seed(11)
|
||||
@@ -231,8 +230,7 @@ test_that("early stopping xgb.train works", {
|
||||
loaded <- xgb.load(fname)
|
||||
|
||||
expect_false(is.null(xgb.attr(loaded, "best_iteration")))
|
||||
expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
|
||||
expect_equal(xgb.attr(loaded, "best_ntreelimit"), xgb.attr(bst, "best_ntreelimit"))
|
||||
expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_iteration"))
|
||||
})
|
||||
|
||||
test_that("early stopping using a specific metric works", {
|
||||
@@ -245,12 +243,11 @@ test_that("early stopping using a specific metric works", {
|
||||
, "Stopping. Best iteration")
|
||||
expect_false(is.null(xgb.attr(bst, "best_iteration")))
|
||||
expect_lt(xgb.attr(bst, "best_iteration"), 19)
|
||||
expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
|
||||
|
||||
pred <- predict(bst, dtest, ntreelimit = xgb.attr(bst, "best_ntreelimit"))
|
||||
pred <- predict(bst, dtest, iterationrange = c(1, xgb.attr(bst, "best_iteration") + 1))
|
||||
expect_equal(length(pred), 1611)
|
||||
logloss_pred <- sum(-ltest * log(pred) - (1 - ltest) * log(1 - pred)) / length(ltest)
|
||||
logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_logloss]
|
||||
logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration") + 1, test_logloss]
|
||||
expect_equal(logloss_log, logloss_pred, tolerance = 1e-5)
|
||||
})
|
||||
|
||||
@@ -286,7 +283,6 @@ test_that("early stopping xgb.cv works", {
|
||||
, "Stopping. Best iteration")
|
||||
expect_false(is.null(cv$best_iteration))
|
||||
expect_lt(cv$best_iteration, 19)
|
||||
expect_equal(cv$best_iteration, cv$best_ntreelimit)
|
||||
# the best error is min error:
|
||||
expect_true(cv$evaluation_log[, test_error_mean[cv$best_iteration] == min(test_error_mean)])
|
||||
})
|
||||
@@ -354,3 +350,44 @@ test_that("prediction in xgb.cv for softprob works", {
|
||||
expect_equal(dim(cv$pred), c(nrow(iris), 3))
|
||||
expect_lt(diff(range(rowSums(cv$pred))), 1e-6)
|
||||
})
|
||||
|
||||
test_that("prediction in xgb.cv works for multi-quantile", {
|
||||
data(mtcars)
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
cv <- xgb.cv(
|
||||
data = dm,
|
||||
params = list(
|
||||
objective = "reg:quantileerror",
|
||||
quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9),
|
||||
nthread = 1
|
||||
),
|
||||
nrounds = 5,
|
||||
nfold = 3,
|
||||
prediction = TRUE,
|
||||
verbose = 0
|
||||
)
|
||||
expect_equal(dim(cv$pred), c(nrow(x), 5))
|
||||
})
|
||||
|
||||
test_that("prediction in xgb.cv works for multi-output", {
|
||||
data(mtcars)
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1)
|
||||
cv <- xgb.cv(
|
||||
data = dm,
|
||||
params = list(
|
||||
tree_method = "hist",
|
||||
multi_strategy = "multi_output_tree",
|
||||
objective = "reg:squarederror",
|
||||
nthread = n_threads
|
||||
),
|
||||
nrounds = 5,
|
||||
nfold = 3,
|
||||
prediction = TRUE,
|
||||
verbose = 0
|
||||
)
|
||||
expect_equal(dim(cv$pred), c(nrow(x), 2))
|
||||
})
|
||||
|
||||
@@ -72,10 +72,10 @@ test_that("gblinear early stopping works", {
|
||||
booster <- xgb.train(
|
||||
param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
|
||||
)
|
||||
expect_equal(xgb.attr(booster, "best_iteration"), 5)
|
||||
expect_equal(xgb.attr(booster, "best_iteration"), 4)
|
||||
predt_es <- predict(booster, dtrain)
|
||||
|
||||
n <- xgb.attr(booster, "best_iteration") + es_round
|
||||
n <- xgb.attr(booster, "best_iteration") + es_round + 1
|
||||
booster <- xgb.train(
|
||||
param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
|
||||
)
|
||||
|
||||
@@ -282,9 +282,6 @@ test_that("xgb.model.dt.tree works with and without feature names", {
|
||||
expect_equal(dim(dt.tree), c(188, 10))
|
||||
expect_output(str(dt.tree), 'Feature.*\\"Age\\"')
|
||||
|
||||
dt.tree.0 <- xgb.model.dt.tree(model = bst.Tree)
|
||||
expect_equal(dt.tree, dt.tree.0)
|
||||
|
||||
# when model contains no feature names:
|
||||
dt.tree.x <- xgb.model.dt.tree(model = bst.Tree.unnamed)
|
||||
expect_output(str(dt.tree.x), 'Feature.*\\"3\\"')
|
||||
@@ -304,7 +301,7 @@ test_that("xgb.model.dt.tree throws error for gblinear", {
|
||||
|
||||
test_that("xgb.importance works with and without feature names", {
|
||||
.skip_if_vcd_not_available()
|
||||
importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree)
|
||||
importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree.unnamed)
|
||||
if (!flag_32bit)
|
||||
expect_equal(dim(importance.Tree), c(7, 4))
|
||||
expect_equal(colnames(importance.Tree), c("Feature", "Gain", "Cover", "Frequency"))
|
||||
@@ -330,9 +327,8 @@ test_that("xgb.importance works with and without feature names", {
|
||||
importance <- xgb.importance(feature_names = feature.names, model = bst.Tree, trees = trees)
|
||||
|
||||
importance_from_dump <- function() {
|
||||
model_text_dump <- xgb.dump(model = bst.Tree.unnamed, with_stats = TRUE, trees = trees)
|
||||
model_text_dump <- xgb.dump(model = bst.Tree, with_stats = TRUE, trees = trees)
|
||||
imp <- xgb.model.dt.tree(
|
||||
feature_names = feature.names,
|
||||
text = model_text_dump,
|
||||
trees = trees
|
||||
)[
|
||||
|
||||
@@ -44,7 +44,7 @@ test_that('Test ranking with weighted data', {
|
||||
expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
|
||||
expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
|
||||
for (i in 1:10) {
|
||||
pred <- predict(bst, newdata = dtrain, ntreelimit = i)
|
||||
pred <- predict(bst, newdata = dtrain, iterationrange = c(1, i))
|
||||
# is_sorted[i]: is i-th group correctly sorted by the ranking predictor?
|
||||
is_sorted <- lapply(seq(1, 20, by = 5),
|
||||
function(k) {
|
||||
|
||||
Reference in New Issue
Block a user