[R] Support booster slicing. (#9948)

This commit is contained in:
david-cortes 2024-01-20 22:11:26 +01:00 committed by GitHub
parent c5d0608057
commit 5062a3ab46
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 237 additions and 2 deletions

View File

@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand # Generated by roxygen2: do not edit by hand
S3method("[",xgb.Booster)
S3method("[",xgb.DMatrix) S3method("[",xgb.DMatrix)
S3method("dimnames<-",xgb.DMatrix) S3method("dimnames<-",xgb.DMatrix)
S3method(coef,xgb.Booster) S3method(coef,xgb.Booster)
@ -7,6 +8,7 @@ S3method(dim,xgb.DMatrix)
S3method(dimnames,xgb.DMatrix) S3method(dimnames,xgb.DMatrix)
S3method(getinfo,xgb.Booster) S3method(getinfo,xgb.Booster)
S3method(getinfo,xgb.DMatrix) S3method(getinfo,xgb.DMatrix)
S3method(length,xgb.Booster)
S3method(predict,xgb.Booster) S3method(predict,xgb.Booster)
S3method(print,xgb.Booster) S3method(print,xgb.Booster)
S3method(print,xgb.DMatrix) S3method(print,xgb.DMatrix)
@ -62,6 +64,7 @@ export(xgb.plot.tree)
export(xgb.save) export(xgb.save)
export(xgb.save.raw) export(xgb.save.raw)
export(xgb.set.config) export(xgb.set.config)
export(xgb.slice.Booster)
export(xgb.train) export(xgb.train)
export(xgboost) export(xgboost)
import(methods) import(methods)

View File

@ -693,16 +693,94 @@ setinfo.xgb.Booster <- function(object, name, info) {
} }
#' @title Get number of boosting in a fitted booster #' @title Get number of boosting in a fitted booster
#' @param model A fitted `xgb.Booster` model. #' @param model,x A fitted `xgb.Booster` model.
#' @return The number of rounds saved in the model, as an integer. #' @return The number of rounds saved in the model, as an integer.
#' @details Note that setting booster parameters related to training #' @details Note that setting booster parameters related to training
#' continuation / updates through \link{xgb.parameters<-} will reset the #' continuation / updates through \link{xgb.parameters<-} will reset the
#' number of rounds to zero. #' number of rounds to zero.
#' @export #' @export
#' @rdname xgb.get.num.boosted.rounds
xgb.get.num.boosted.rounds <- function(model) { xgb.get.num.boosted.rounds <- function(model) {
return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model))) return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model)))
} }
#' @rdname xgb.get.num.boosted.rounds
#' @export
length.xgb.Booster <- function(x) {
return(xgb.get.num.boosted.rounds(x))
}
#' @title Slice Booster by Rounds
#' @description Creates a new booster including only a selected range of rounds / iterations
#' from an existing booster, as given by the sequence `seq(start, end, step)`.
#' @details Note that any R attributes that the booster might have, will not be copied into
#' the resulting object.
#' @param model,x A fitted `xgb.Booster` object, which is to be sliced by taking only a subset
#' of its rounds / iterations.
#' @param start Start of the slice (base-1 and inclusive, like R's \link{seq}).
#' @param end End of the slice (base-1 and inclusive, like R's \link{seq}).
#'
#' Passing a value of zero here is equivalent to passing the full number of rounds in the
#' booster object.
#' @param step Step size of the slice. Passing '1' will take every round in the sequence defined by
#' `(start, end)`, while passing '2' will take every second value, and so on.
#' @return A sliced booster object containing only the requested rounds.
#' @examples
#' data(mtcars)
#' y <- mtcars$mpg
#' x <- as.matrix(mtcars[, -1])
#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
#' model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
#' model_slice <- xgb.slice.Booster(model, 1, 3)
#' # Prediction for first three rounds
#' predict(model, x, predleaf = TRUE)[, 1:3]
#'
#' # The new model has only those rounds, so
#' # a full prediction from it is equivalent
#' predict(model_slice, x, predleaf = TRUE)
#' @export
#' @rdname xgb.slice.Booster
xgb.slice.Booster <- function(model, start, end = xgb.get.num.boosted.rounds(model), step = 1L) {
# This makes the slice mimic the behavior of R's 'seq',
# which truncates on the end of the slice when the step
# doesn't reach it.
if (end > start && step > 1) {
d <- (end - start + 1) / step
if (d != floor(d)) {
end <- start + step * ceiling(d) - 1
}
}
return(
.Call(
XGBoosterSlice_R,
xgb.get.handle(model),
start - 1,
end,
step
)
)
}
#' @export
#' @rdname xgb.slice.Booster
#' @param i The indices - must be an increasing sequence as generated by e.g. `seq(...)`.
`[.xgb.Booster` <- function(x, i) {
if (missing(i)) {
return(xgb.slice.Booster(x, 1, 0))
}
if (length(i) == 1) {
return(xgb.slice.Booster(x, i, i))
}
steps <- diff(i)
if (any(steps < 0)) {
stop("Can only slice booster with ascending sequences.")
}
if (length(unique(steps)) > 1) {
stop("Can only slice booster with fixed-step sequences.")
}
return(xgb.slice.Booster(x, i[1L], i[length(i)], steps[1L]))
}
#' @title Get Features Names from Booster #' @title Get Features Names from Booster
#' @description Returns the feature / variable / column names from a fitted #' @description Returns the feature / variable / column names from a fitted
#' booster object, which are set automatically during the call to \link{xgb.train} #' booster object, which are set automatically during the call to \link{xgb.train}

View File

@ -2,12 +2,15 @@
% Please edit documentation in R/xgb.Booster.R % Please edit documentation in R/xgb.Booster.R
\name{xgb.get.num.boosted.rounds} \name{xgb.get.num.boosted.rounds}
\alias{xgb.get.num.boosted.rounds} \alias{xgb.get.num.boosted.rounds}
\alias{length.xgb.Booster}
\title{Get number of boosting in a fitted booster} \title{Get number of boosting in a fitted booster}
\usage{ \usage{
xgb.get.num.boosted.rounds(model) xgb.get.num.boosted.rounds(model)
\method{length}{xgb.Booster}(x)
} }
\arguments{ \arguments{
\item{model}{A fitted \code{xgb.Booster} model.} \item{model, x}{A fitted \code{xgb.Booster} model.}
} }
\value{ \value{
The number of rounds saved in the model, as an integer. The number of rounds saved in the model, as an integer.

View File

@ -0,0 +1,57 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xgb.Booster.R
\name{xgb.slice.Booster}
\alias{xgb.slice.Booster}
\alias{[.xgb.Booster}
\title{Slice Booster by Rounds}
\usage{
xgb.slice.Booster(
model,
start,
end = xgb.get.num.boosted.rounds(model),
step = 1L
)
\method{[}{xgb.Booster}(x, i)
}
\arguments{
\item{model, x}{A fitted \code{xgb.Booster} object, which is to be sliced by taking only a subset
of its rounds / iterations.}
\item{start}{Start of the slice (base-1 and inclusive, like R's \link{seq}).}
\item{end}{End of the slice (base-1 and inclusive, like R's \link{seq}).
Passing a value of zero here is equivalent to passing the full number of rounds in the
booster object.}
\item{step}{Step size of the slice. Passing '1' will take every round in the sequence defined by
\verb{(start, end)}, while passing '2' will take every second value, and so on.}
\item{i}{The indices - must be an increasing sequence as generated by e.g. \code{seq(...)}.}
}
\value{
A sliced booster object containing only the requested rounds.
}
\description{
Creates a new booster including only a selected range of rounds / iterations
from an existing booster, as given by the sequence \code{seq(start, end, step)}.
}
\details{
Note that any R attributes that the booster might have, will not be copied into
the resulting object.
}
\examples{
data(mtcars)
y <- mtcars$mpg
x <- as.matrix(mtcars[, -1])
dm <- xgb.DMatrix(x, label = y, nthread = 1)
model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
model_slice <- xgb.slice.Booster(model, 1, 3)
# Prediction for first three rounds
predict(model, x, predleaf = TRUE)[, 1:3]
# The new model has only those rounds, so
# a full prediction from it is equivalent
predict(model_slice, x, predleaf = TRUE)
}

View File

@ -64,6 +64,7 @@ extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP);
extern SEXP XGBSetGlobalConfig_R(SEXP); extern SEXP XGBSetGlobalConfig_R(SEXP);
extern SEXP XGBGetGlobalConfig_R(void); extern SEXP XGBGetGlobalConfig_R(void);
extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP); extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);
extern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP);
static const R_CallMethodDef CallEntries[] = { static const R_CallMethodDef CallEntries[] = {
{"XGDuplicate_R", (DL_FUNC) &XGDuplicate_R, 1}, {"XGDuplicate_R", (DL_FUNC) &XGDuplicate_R, 1},
@ -114,6 +115,7 @@ static const R_CallMethodDef CallEntries[] = {
{"XGBSetGlobalConfig_R", (DL_FUNC) &XGBSetGlobalConfig_R, 1}, {"XGBSetGlobalConfig_R", (DL_FUNC) &XGBSetGlobalConfig_R, 1},
{"XGBGetGlobalConfig_R", (DL_FUNC) &XGBGetGlobalConfig_R, 0}, {"XGBGetGlobalConfig_R", (DL_FUNC) &XGBGetGlobalConfig_R, 0},
{"XGBoosterFeatureScore_R", (DL_FUNC) &XGBoosterFeatureScore_R, 2}, {"XGBoosterFeatureScore_R", (DL_FUNC) &XGBoosterFeatureScore_R, 2},
{"XGBoosterSlice_R", (DL_FUNC) &XGBoosterSlice_R, 4},
{NULL, NULL, 0} {NULL, NULL, 0}
}; };

View File

@ -1289,3 +1289,18 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {
return r_out; return r_out;
} }
XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) {
SEXP out = Rf_protect(XGBMakeEmptyAltrep());
R_API_BEGIN();
BoosterHandle handle_out = nullptr;
CHECK_CALL(XGBoosterSlice(R_ExternalPtrAddr(handle),
Rf_asInteger(begin_layer),
Rf_asInteger(end_layer),
Rf_asInteger(step),
&handle_out));
XGBAltrepSetPointer(out, handle_out);
R_API_END();
Rf_unprotect(1);
return out;
}

View File

@ -402,4 +402,14 @@ XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle);
*/ */
XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config); XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);
/*!
* \brief Slice a fitted booster model (by rounds)
* \param handle handle to the fitted booster
* \param begin_layer start of the slice
* \param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round
* \param step step size of the slice
* \return The sliced booster with the requested rounds only
*/
XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);
#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*) #endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)

View File

@ -0,0 +1,67 @@
context("testing xgb.Booster slicing")
data(agaricus.train, package = "xgboost")
dm <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1)
# Note: here need large step sizes in order for the predictions
# to have substantially different leaf assignments on each tree
model <- xgb.train(
params = list(objective = "binary:logistic", nthread = 1, max_depth = 4, eta = 0.5),
data = dm,
nrounds = 20
)
pred <- predict(model, dm, predleaf = TRUE, reshape = TRUE)
test_that("Slicing full model", {
new_model <- xgb.slice.Booster(model, 1, 0)
expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
new_model <- model[]
expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
new_model <- model[1:length(model)] # nolint
expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
})
test_that("Slicing sequence from start", {
new_model <- xgb.slice.Booster(model, 1, 10)
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
expect_equal(new_pred, pred[, seq(1, 10)])
new_model <- model[1:10]
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
expect_equal(new_pred, pred[, seq(1, 10)])
})
test_that("Slicing sequence from middle", {
new_model <- xgb.slice.Booster(model, 5, 10)
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
expect_equal(new_pred, pred[, seq(5, 10)])
new_model <- model[5:10]
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
expect_equal(new_pred, pred[, seq(5, 10)])
})
test_that("Slicing with non-unit step", {
for (s in 2:5) {
new_model <- xgb.slice.Booster(model, 1, 17, s)
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
expect_equal(new_pred, pred[, seq(1, 17, s)])
new_model <- model[seq(1, 17, s)]
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
expect_equal(new_pred, pred[, seq(1, 17, s)])
}
})
test_that("Slicing with non-unit step from middle", {
for (s in 2:5) {
new_model <- xgb.slice.Booster(model, 4, 17, s)
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
expect_equal(new_pred, pred[, seq(4, 17, s)])
new_model <- model[seq(4, 17, s)]
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
expect_equal(new_pred, pred[, seq(4, 17, s)])
}
})