diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index a29c9b1e0..398b0da5a 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method("[",xgb.Booster) S3method("[",xgb.DMatrix) S3method("dimnames<-",xgb.DMatrix) S3method(coef,xgb.Booster) @@ -7,6 +8,7 @@ S3method(dim,xgb.DMatrix) S3method(dimnames,xgb.DMatrix) S3method(getinfo,xgb.Booster) S3method(getinfo,xgb.DMatrix) +S3method(length,xgb.Booster) S3method(predict,xgb.Booster) S3method(print,xgb.Booster) S3method(print,xgb.DMatrix) @@ -62,6 +64,7 @@ export(xgb.plot.tree) export(xgb.save) export(xgb.save.raw) export(xgb.set.config) +export(xgb.slice.Booster) export(xgb.train) export(xgboost) import(methods) diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index 5d8346abc..7613c9152 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -693,16 +693,94 @@ setinfo.xgb.Booster <- function(object, name, info) { } #' @title Get number of boosting in a fitted booster -#' @param model A fitted `xgb.Booster` model. +#' @param model,x A fitted `xgb.Booster` model. #' @return The number of rounds saved in the model, as an integer. #' @details Note that setting booster parameters related to training #' continuation / updates through \link{xgb.parameters<-} will reset the #' number of rounds to zero. #' @export +#' @rdname xgb.get.num.boosted.rounds xgb.get.num.boosted.rounds <- function(model) { return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model))) } +#' @rdname xgb.get.num.boosted.rounds +#' @export +length.xgb.Booster <- function(x) { + return(xgb.get.num.boosted.rounds(x)) +} + +#' @title Slice Booster by Rounds +#' @description Creates a new booster including only a selected range of rounds / iterations +#' from an existing booster, as given by the sequence `seq(start, end, step)`. +#' @details Note that any R attributes that the booster might have, will not be copied into +#' the resulting object. +#' @param model,x A fitted `xgb.Booster` object, which is to be sliced by taking only a subset +#' of its rounds / iterations. +#' @param start Start of the slice (base-1 and inclusive, like R's \link{seq}). +#' @param end End of the slice (base-1 and inclusive, like R's \link{seq}). +#' +#' Passing a value of zero here is equivalent to passing the full number of rounds in the +#' booster object. +#' @param step Step size of the slice. Passing '1' will take every round in the sequence defined by +#' `(start, end)`, while passing '2' will take every second value, and so on. +#' @return A sliced booster object containing only the requested rounds. +#' @examples +#' data(mtcars) +#' y <- mtcars$mpg +#' x <- as.matrix(mtcars[, -1]) +#' dm <- xgb.DMatrix(x, label = y, nthread = 1) +#' model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5) +#' model_slice <- xgb.slice.Booster(model, 1, 3) +#' # Prediction for first three rounds +#' predict(model, x, predleaf = TRUE)[, 1:3] +#' +#' # The new model has only those rounds, so +#' # a full prediction from it is equivalent +#' predict(model_slice, x, predleaf = TRUE) +#' @export +#' @rdname xgb.slice.Booster +xgb.slice.Booster <- function(model, start, end = xgb.get.num.boosted.rounds(model), step = 1L) { + # This makes the slice mimic the behavior of R's 'seq', + # which truncates on the end of the slice when the step + # doesn't reach it. + if (end > start && step > 1) { + d <- (end - start + 1) / step + if (d != floor(d)) { + end <- start + step * ceiling(d) - 1 + } + } + return( + .Call( + XGBoosterSlice_R, + xgb.get.handle(model), + start - 1, + end, + step + ) + ) +} + +#' @export +#' @rdname xgb.slice.Booster +#' @param i The indices - must be an increasing sequence as generated by e.g. `seq(...)`. +`[.xgb.Booster` <- function(x, i) { + if (missing(i)) { + return(xgb.slice.Booster(x, 1, 0)) + } + if (length(i) == 1) { + return(xgb.slice.Booster(x, i, i)) + } + steps <- diff(i) + if (any(steps < 0)) { + stop("Can only slice booster with ascending sequences.") + } + if (length(unique(steps)) > 1) { + stop("Can only slice booster with fixed-step sequences.") + } + return(xgb.slice.Booster(x, i[1L], i[length(i)], steps[1L])) +} + #' @title Get Features Names from Booster #' @description Returns the feature / variable / column names from a fitted #' booster object, which are set automatically during the call to \link{xgb.train} diff --git a/R-package/man/xgb.get.num.boosted.rounds.Rd b/R-package/man/xgb.get.num.boosted.rounds.Rd index 74c94d95b..551dc4a83 100644 --- a/R-package/man/xgb.get.num.boosted.rounds.Rd +++ b/R-package/man/xgb.get.num.boosted.rounds.Rd @@ -2,12 +2,15 @@ % Please edit documentation in R/xgb.Booster.R \name{xgb.get.num.boosted.rounds} \alias{xgb.get.num.boosted.rounds} +\alias{length.xgb.Booster} \title{Get number of boosting in a fitted booster} \usage{ xgb.get.num.boosted.rounds(model) + +\method{length}{xgb.Booster}(x) } \arguments{ -\item{model}{A fitted \code{xgb.Booster} model.} +\item{model, x}{A fitted \code{xgb.Booster} model.} } \value{ The number of rounds saved in the model, as an integer. diff --git a/R-package/man/xgb.slice.Booster.Rd b/R-package/man/xgb.slice.Booster.Rd new file mode 100644 index 000000000..759139901 --- /dev/null +++ b/R-package/man/xgb.slice.Booster.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.Booster.R +\name{xgb.slice.Booster} +\alias{xgb.slice.Booster} +\alias{[.xgb.Booster} +\title{Slice Booster by Rounds} +\usage{ +xgb.slice.Booster( + model, + start, + end = xgb.get.num.boosted.rounds(model), + step = 1L +) + +\method{[}{xgb.Booster}(x, i) +} +\arguments{ +\item{model, x}{A fitted \code{xgb.Booster} object, which is to be sliced by taking only a subset +of its rounds / iterations.} + +\item{start}{Start of the slice (base-1 and inclusive, like R's \link{seq}).} + +\item{end}{End of the slice (base-1 and inclusive, like R's \link{seq}). + +Passing a value of zero here is equivalent to passing the full number of rounds in the +booster object.} + +\item{step}{Step size of the slice. Passing '1' will take every round in the sequence defined by +\verb{(start, end)}, while passing '2' will take every second value, and so on.} + +\item{i}{The indices - must be an increasing sequence as generated by e.g. \code{seq(...)}.} +} +\value{ +A sliced booster object containing only the requested rounds. +} +\description{ +Creates a new booster including only a selected range of rounds / iterations +from an existing booster, as given by the sequence \code{seq(start, end, step)}. +} +\details{ +Note that any R attributes that the booster might have, will not be copied into +the resulting object. +} +\examples{ +data(mtcars) +y <- mtcars$mpg +x <- as.matrix(mtcars[, -1]) +dm <- xgb.DMatrix(x, label = y, nthread = 1) +model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5) +model_slice <- xgb.slice.Booster(model, 1, 3) +# Prediction for first three rounds +predict(model, x, predleaf = TRUE)[, 1:3] + +# The new model has only those rounds, so +# a full prediction from it is equivalent +predict(model_slice, x, predleaf = TRUE) +} diff --git a/R-package/src/init.c b/R-package/src/init.c index dd3a1aa2f..fff5d9f90 100644 --- a/R-package/src/init.c +++ b/R-package/src/init.c @@ -64,6 +64,7 @@ extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP); extern SEXP XGBSetGlobalConfig_R(SEXP); extern SEXP XGBGetGlobalConfig_R(void); extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP); +extern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"XGDuplicate_R", (DL_FUNC) &XGDuplicate_R, 1}, @@ -114,6 +115,7 @@ static const R_CallMethodDef CallEntries[] = { {"XGBSetGlobalConfig_R", (DL_FUNC) &XGBSetGlobalConfig_R, 1}, {"XGBGetGlobalConfig_R", (DL_FUNC) &XGBGetGlobalConfig_R, 0}, {"XGBoosterFeatureScore_R", (DL_FUNC) &XGBoosterFeatureScore_R, 2}, + {"XGBoosterSlice_R", (DL_FUNC) &XGBoosterSlice_R, 4}, {NULL, NULL, 0} }; diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index 4a8710124..1d01b9aae 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -1289,3 +1289,18 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) { return r_out; } + +XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) { + SEXP out = Rf_protect(XGBMakeEmptyAltrep()); + R_API_BEGIN(); + BoosterHandle handle_out = nullptr; + CHECK_CALL(XGBoosterSlice(R_ExternalPtrAddr(handle), + Rf_asInteger(begin_layer), + Rf_asInteger(end_layer), + Rf_asInteger(step), + &handle_out)); + XGBAltrepSetPointer(out, handle_out); + R_API_END(); + Rf_unprotect(1); + return out; +} diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index e2688bf34..ec30dbada 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -402,4 +402,14 @@ XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle); */ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config); +/*! + * \brief Slice a fitted booster model (by rounds) + * \param handle handle to the fitted booster + * \param begin_layer start of the slice + * \param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round + * \param step step size of the slice + * \return The sliced booster with the requested rounds only + */ +XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step); + #endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*) diff --git a/R-package/tests/testthat/test_booster_slicing.R b/R-package/tests/testthat/test_booster_slicing.R new file mode 100644 index 000000000..711ccd8b6 --- /dev/null +++ b/R-package/tests/testthat/test_booster_slicing.R @@ -0,0 +1,67 @@ +context("testing xgb.Booster slicing") + +data(agaricus.train, package = "xgboost") +dm <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1) +# Note: here need large step sizes in order for the predictions +# to have substantially different leaf assignments on each tree +model <- xgb.train( + params = list(objective = "binary:logistic", nthread = 1, max_depth = 4, eta = 0.5), + data = dm, + nrounds = 20 +) +pred <- predict(model, dm, predleaf = TRUE, reshape = TRUE) + +test_that("Slicing full model", { + new_model <- xgb.slice.Booster(model, 1, 0) + expect_equal(xgb.save.raw(new_model), xgb.save.raw(model)) + + new_model <- model[] + expect_equal(xgb.save.raw(new_model), xgb.save.raw(model)) + + new_model <- model[1:length(model)] # nolint + expect_equal(xgb.save.raw(new_model), xgb.save.raw(model)) +}) + +test_that("Slicing sequence from start", { + new_model <- xgb.slice.Booster(model, 1, 10) + new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + expect_equal(new_pred, pred[, seq(1, 10)]) + + new_model <- model[1:10] + new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + expect_equal(new_pred, pred[, seq(1, 10)]) +}) + +test_that("Slicing sequence from middle", { + new_model <- xgb.slice.Booster(model, 5, 10) + new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + expect_equal(new_pred, pred[, seq(5, 10)]) + + new_model <- model[5:10] + new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + expect_equal(new_pred, pred[, seq(5, 10)]) +}) + +test_that("Slicing with non-unit step", { + for (s in 2:5) { + new_model <- xgb.slice.Booster(model, 1, 17, s) + new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + expect_equal(new_pred, pred[, seq(1, 17, s)]) + + new_model <- model[seq(1, 17, s)] + new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + expect_equal(new_pred, pred[, seq(1, 17, s)]) + } +}) + +test_that("Slicing with non-unit step from middle", { + for (s in 2:5) { + new_model <- xgb.slice.Booster(model, 4, 17, s) + new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + expect_equal(new_pred, pred[, seq(4, 17, s)]) + + new_model <- model[seq(4, 17, s)] + new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE) + expect_equal(new_pred, pred[, seq(4, 17, s)]) + } +})