[R] Support booster slicing. (#9948)
This commit is contained in:
parent
c5d0608057
commit
5062a3ab46
@ -1,5 +1,6 @@
|
|||||||
# Generated by roxygen2: do not edit by hand
|
# Generated by roxygen2: do not edit by hand
|
||||||
|
|
||||||
|
S3method("[",xgb.Booster)
|
||||||
S3method("[",xgb.DMatrix)
|
S3method("[",xgb.DMatrix)
|
||||||
S3method("dimnames<-",xgb.DMatrix)
|
S3method("dimnames<-",xgb.DMatrix)
|
||||||
S3method(coef,xgb.Booster)
|
S3method(coef,xgb.Booster)
|
||||||
@ -7,6 +8,7 @@ S3method(dim,xgb.DMatrix)
|
|||||||
S3method(dimnames,xgb.DMatrix)
|
S3method(dimnames,xgb.DMatrix)
|
||||||
S3method(getinfo,xgb.Booster)
|
S3method(getinfo,xgb.Booster)
|
||||||
S3method(getinfo,xgb.DMatrix)
|
S3method(getinfo,xgb.DMatrix)
|
||||||
|
S3method(length,xgb.Booster)
|
||||||
S3method(predict,xgb.Booster)
|
S3method(predict,xgb.Booster)
|
||||||
S3method(print,xgb.Booster)
|
S3method(print,xgb.Booster)
|
||||||
S3method(print,xgb.DMatrix)
|
S3method(print,xgb.DMatrix)
|
||||||
@ -62,6 +64,7 @@ export(xgb.plot.tree)
|
|||||||
export(xgb.save)
|
export(xgb.save)
|
||||||
export(xgb.save.raw)
|
export(xgb.save.raw)
|
||||||
export(xgb.set.config)
|
export(xgb.set.config)
|
||||||
|
export(xgb.slice.Booster)
|
||||||
export(xgb.train)
|
export(xgb.train)
|
||||||
export(xgboost)
|
export(xgboost)
|
||||||
import(methods)
|
import(methods)
|
||||||
|
|||||||
@ -693,16 +693,94 @@ setinfo.xgb.Booster <- function(object, name, info) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#' @title Get number of boosting in a fitted booster
|
#' @title Get number of boosting in a fitted booster
|
||||||
#' @param model A fitted `xgb.Booster` model.
|
#' @param model,x A fitted `xgb.Booster` model.
|
||||||
#' @return The number of rounds saved in the model, as an integer.
|
#' @return The number of rounds saved in the model, as an integer.
|
||||||
#' @details Note that setting booster parameters related to training
|
#' @details Note that setting booster parameters related to training
|
||||||
#' continuation / updates through \link{xgb.parameters<-} will reset the
|
#' continuation / updates through \link{xgb.parameters<-} will reset the
|
||||||
#' number of rounds to zero.
|
#' number of rounds to zero.
|
||||||
#' @export
|
#' @export
|
||||||
|
#' @rdname xgb.get.num.boosted.rounds
|
||||||
xgb.get.num.boosted.rounds <- function(model) {
|
xgb.get.num.boosted.rounds <- function(model) {
|
||||||
return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model)))
|
return(.Call(XGBoosterBoostedRounds_R, xgb.get.handle(model)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#' @rdname xgb.get.num.boosted.rounds
|
||||||
|
#' @export
|
||||||
|
length.xgb.Booster <- function(x) {
|
||||||
|
return(xgb.get.num.boosted.rounds(x))
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @title Slice Booster by Rounds
|
||||||
|
#' @description Creates a new booster including only a selected range of rounds / iterations
|
||||||
|
#' from an existing booster, as given by the sequence `seq(start, end, step)`.
|
||||||
|
#' @details Note that any R attributes that the booster might have, will not be copied into
|
||||||
|
#' the resulting object.
|
||||||
|
#' @param model,x A fitted `xgb.Booster` object, which is to be sliced by taking only a subset
|
||||||
|
#' of its rounds / iterations.
|
||||||
|
#' @param start Start of the slice (base-1 and inclusive, like R's \link{seq}).
|
||||||
|
#' @param end End of the slice (base-1 and inclusive, like R's \link{seq}).
|
||||||
|
#'
|
||||||
|
#' Passing a value of zero here is equivalent to passing the full number of rounds in the
|
||||||
|
#' booster object.
|
||||||
|
#' @param step Step size of the slice. Passing '1' will take every round in the sequence defined by
|
||||||
|
#' `(start, end)`, while passing '2' will take every second value, and so on.
|
||||||
|
#' @return A sliced booster object containing only the requested rounds.
|
||||||
|
#' @examples
|
||||||
|
#' data(mtcars)
|
||||||
|
#' y <- mtcars$mpg
|
||||||
|
#' x <- as.matrix(mtcars[, -1])
|
||||||
|
#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||||
|
#' model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
|
||||||
|
#' model_slice <- xgb.slice.Booster(model, 1, 3)
|
||||||
|
#' # Prediction for first three rounds
|
||||||
|
#' predict(model, x, predleaf = TRUE)[, 1:3]
|
||||||
|
#'
|
||||||
|
#' # The new model has only those rounds, so
|
||||||
|
#' # a full prediction from it is equivalent
|
||||||
|
#' predict(model_slice, x, predleaf = TRUE)
|
||||||
|
#' @export
|
||||||
|
#' @rdname xgb.slice.Booster
|
||||||
|
xgb.slice.Booster <- function(model, start, end = xgb.get.num.boosted.rounds(model), step = 1L) {
|
||||||
|
# This makes the slice mimic the behavior of R's 'seq',
|
||||||
|
# which truncates on the end of the slice when the step
|
||||||
|
# doesn't reach it.
|
||||||
|
if (end > start && step > 1) {
|
||||||
|
d <- (end - start + 1) / step
|
||||||
|
if (d != floor(d)) {
|
||||||
|
end <- start + step * ceiling(d) - 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(
|
||||||
|
.Call(
|
||||||
|
XGBoosterSlice_R,
|
||||||
|
xgb.get.handle(model),
|
||||||
|
start - 1,
|
||||||
|
end,
|
||||||
|
step
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#' @export
|
||||||
|
#' @rdname xgb.slice.Booster
|
||||||
|
#' @param i The indices - must be an increasing sequence as generated by e.g. `seq(...)`.
|
||||||
|
`[.xgb.Booster` <- function(x, i) {
|
||||||
|
if (missing(i)) {
|
||||||
|
return(xgb.slice.Booster(x, 1, 0))
|
||||||
|
}
|
||||||
|
if (length(i) == 1) {
|
||||||
|
return(xgb.slice.Booster(x, i, i))
|
||||||
|
}
|
||||||
|
steps <- diff(i)
|
||||||
|
if (any(steps < 0)) {
|
||||||
|
stop("Can only slice booster with ascending sequences.")
|
||||||
|
}
|
||||||
|
if (length(unique(steps)) > 1) {
|
||||||
|
stop("Can only slice booster with fixed-step sequences.")
|
||||||
|
}
|
||||||
|
return(xgb.slice.Booster(x, i[1L], i[length(i)], steps[1L]))
|
||||||
|
}
|
||||||
|
|
||||||
#' @title Get Features Names from Booster
|
#' @title Get Features Names from Booster
|
||||||
#' @description Returns the feature / variable / column names from a fitted
|
#' @description Returns the feature / variable / column names from a fitted
|
||||||
#' booster object, which are set automatically during the call to \link{xgb.train}
|
#' booster object, which are set automatically during the call to \link{xgb.train}
|
||||||
|
|||||||
@ -2,12 +2,15 @@
|
|||||||
% Please edit documentation in R/xgb.Booster.R
|
% Please edit documentation in R/xgb.Booster.R
|
||||||
\name{xgb.get.num.boosted.rounds}
|
\name{xgb.get.num.boosted.rounds}
|
||||||
\alias{xgb.get.num.boosted.rounds}
|
\alias{xgb.get.num.boosted.rounds}
|
||||||
|
\alias{length.xgb.Booster}
|
||||||
\title{Get number of boosting in a fitted booster}
|
\title{Get number of boosting in a fitted booster}
|
||||||
\usage{
|
\usage{
|
||||||
xgb.get.num.boosted.rounds(model)
|
xgb.get.num.boosted.rounds(model)
|
||||||
|
|
||||||
|
\method{length}{xgb.Booster}(x)
|
||||||
}
|
}
|
||||||
\arguments{
|
\arguments{
|
||||||
\item{model}{A fitted \code{xgb.Booster} model.}
|
\item{model, x}{A fitted \code{xgb.Booster} model.}
|
||||||
}
|
}
|
||||||
\value{
|
\value{
|
||||||
The number of rounds saved in the model, as an integer.
|
The number of rounds saved in the model, as an integer.
|
||||||
|
|||||||
57
R-package/man/xgb.slice.Booster.Rd
Normal file
57
R-package/man/xgb.slice.Booster.Rd
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
% Generated by roxygen2: do not edit by hand
|
||||||
|
% Please edit documentation in R/xgb.Booster.R
|
||||||
|
\name{xgb.slice.Booster}
|
||||||
|
\alias{xgb.slice.Booster}
|
||||||
|
\alias{[.xgb.Booster}
|
||||||
|
\title{Slice Booster by Rounds}
|
||||||
|
\usage{
|
||||||
|
xgb.slice.Booster(
|
||||||
|
model,
|
||||||
|
start,
|
||||||
|
end = xgb.get.num.boosted.rounds(model),
|
||||||
|
step = 1L
|
||||||
|
)
|
||||||
|
|
||||||
|
\method{[}{xgb.Booster}(x, i)
|
||||||
|
}
|
||||||
|
\arguments{
|
||||||
|
\item{model, x}{A fitted \code{xgb.Booster} object, which is to be sliced by taking only a subset
|
||||||
|
of its rounds / iterations.}
|
||||||
|
|
||||||
|
\item{start}{Start of the slice (base-1 and inclusive, like R's \link{seq}).}
|
||||||
|
|
||||||
|
\item{end}{End of the slice (base-1 and inclusive, like R's \link{seq}).
|
||||||
|
|
||||||
|
Passing a value of zero here is equivalent to passing the full number of rounds in the
|
||||||
|
booster object.}
|
||||||
|
|
||||||
|
\item{step}{Step size of the slice. Passing '1' will take every round in the sequence defined by
|
||||||
|
\verb{(start, end)}, while passing '2' will take every second value, and so on.}
|
||||||
|
|
||||||
|
\item{i}{The indices - must be an increasing sequence as generated by e.g. \code{seq(...)}.}
|
||||||
|
}
|
||||||
|
\value{
|
||||||
|
A sliced booster object containing only the requested rounds.
|
||||||
|
}
|
||||||
|
\description{
|
||||||
|
Creates a new booster including only a selected range of rounds / iterations
|
||||||
|
from an existing booster, as given by the sequence \code{seq(start, end, step)}.
|
||||||
|
}
|
||||||
|
\details{
|
||||||
|
Note that any R attributes that the booster might have, will not be copied into
|
||||||
|
the resulting object.
|
||||||
|
}
|
||||||
|
\examples{
|
||||||
|
data(mtcars)
|
||||||
|
y <- mtcars$mpg
|
||||||
|
x <- as.matrix(mtcars[, -1])
|
||||||
|
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||||
|
model <- xgb.train(data = dm, params = list(nthread = 1), nrounds = 5)
|
||||||
|
model_slice <- xgb.slice.Booster(model, 1, 3)
|
||||||
|
# Prediction for first three rounds
|
||||||
|
predict(model, x, predleaf = TRUE)[, 1:3]
|
||||||
|
|
||||||
|
# The new model has only those rounds, so
|
||||||
|
# a full prediction from it is equivalent
|
||||||
|
predict(model_slice, x, predleaf = TRUE)
|
||||||
|
}
|
||||||
@ -64,6 +64,7 @@ extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP);
|
|||||||
extern SEXP XGBSetGlobalConfig_R(SEXP);
|
extern SEXP XGBSetGlobalConfig_R(SEXP);
|
||||||
extern SEXP XGBGetGlobalConfig_R(void);
|
extern SEXP XGBGetGlobalConfig_R(void);
|
||||||
extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);
|
extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);
|
||||||
|
extern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP);
|
||||||
|
|
||||||
static const R_CallMethodDef CallEntries[] = {
|
static const R_CallMethodDef CallEntries[] = {
|
||||||
{"XGDuplicate_R", (DL_FUNC) &XGDuplicate_R, 1},
|
{"XGDuplicate_R", (DL_FUNC) &XGDuplicate_R, 1},
|
||||||
@ -114,6 +115,7 @@ static const R_CallMethodDef CallEntries[] = {
|
|||||||
{"XGBSetGlobalConfig_R", (DL_FUNC) &XGBSetGlobalConfig_R, 1},
|
{"XGBSetGlobalConfig_R", (DL_FUNC) &XGBSetGlobalConfig_R, 1},
|
||||||
{"XGBGetGlobalConfig_R", (DL_FUNC) &XGBGetGlobalConfig_R, 0},
|
{"XGBGetGlobalConfig_R", (DL_FUNC) &XGBGetGlobalConfig_R, 0},
|
||||||
{"XGBoosterFeatureScore_R", (DL_FUNC) &XGBoosterFeatureScore_R, 2},
|
{"XGBoosterFeatureScore_R", (DL_FUNC) &XGBoosterFeatureScore_R, 2},
|
||||||
|
{"XGBoosterSlice_R", (DL_FUNC) &XGBoosterSlice_R, 4},
|
||||||
{NULL, NULL, 0}
|
{NULL, NULL, 0}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -1289,3 +1289,18 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config) {
|
|||||||
|
|
||||||
return r_out;
|
return r_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) {
|
||||||
|
SEXP out = Rf_protect(XGBMakeEmptyAltrep());
|
||||||
|
R_API_BEGIN();
|
||||||
|
BoosterHandle handle_out = nullptr;
|
||||||
|
CHECK_CALL(XGBoosterSlice(R_ExternalPtrAddr(handle),
|
||||||
|
Rf_asInteger(begin_layer),
|
||||||
|
Rf_asInteger(end_layer),
|
||||||
|
Rf_asInteger(step),
|
||||||
|
&handle_out));
|
||||||
|
XGBAltrepSetPointer(out, handle_out);
|
||||||
|
R_API_END();
|
||||||
|
Rf_unprotect(1);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|||||||
@ -402,4 +402,14 @@ XGB_DLL SEXP XGBoosterGetAttrNames_R(SEXP handle);
|
|||||||
*/
|
*/
|
||||||
XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);
|
XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Slice a fitted booster model (by rounds)
|
||||||
|
* \param handle handle to the fitted booster
|
||||||
|
* \param begin_layer start of the slice
|
||||||
|
* \param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round
|
||||||
|
* \param step step size of the slice
|
||||||
|
* \return The sliced booster with the requested rounds only
|
||||||
|
*/
|
||||||
|
XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);
|
||||||
|
|
||||||
#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
|
#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
|
||||||
|
|||||||
67
R-package/tests/testthat/test_booster_slicing.R
Normal file
67
R-package/tests/testthat/test_booster_slicing.R
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
context("testing xgb.Booster slicing")
|
||||||
|
|
||||||
|
data(agaricus.train, package = "xgboost")
|
||||||
|
dm <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label, nthread = 1)
|
||||||
|
# Note: here need large step sizes in order for the predictions
|
||||||
|
# to have substantially different leaf assignments on each tree
|
||||||
|
model <- xgb.train(
|
||||||
|
params = list(objective = "binary:logistic", nthread = 1, max_depth = 4, eta = 0.5),
|
||||||
|
data = dm,
|
||||||
|
nrounds = 20
|
||||||
|
)
|
||||||
|
pred <- predict(model, dm, predleaf = TRUE, reshape = TRUE)
|
||||||
|
|
||||||
|
test_that("Slicing full model", {
|
||||||
|
new_model <- xgb.slice.Booster(model, 1, 0)
|
||||||
|
expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
|
||||||
|
|
||||||
|
new_model <- model[]
|
||||||
|
expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
|
||||||
|
|
||||||
|
new_model <- model[1:length(model)] # nolint
|
||||||
|
expect_equal(xgb.save.raw(new_model), xgb.save.raw(model))
|
||||||
|
})
|
||||||
|
|
||||||
|
test_that("Slicing sequence from start", {
|
||||||
|
new_model <- xgb.slice.Booster(model, 1, 10)
|
||||||
|
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||||
|
expect_equal(new_pred, pred[, seq(1, 10)])
|
||||||
|
|
||||||
|
new_model <- model[1:10]
|
||||||
|
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||||
|
expect_equal(new_pred, pred[, seq(1, 10)])
|
||||||
|
})
|
||||||
|
|
||||||
|
test_that("Slicing sequence from middle", {
|
||||||
|
new_model <- xgb.slice.Booster(model, 5, 10)
|
||||||
|
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||||
|
expect_equal(new_pred, pred[, seq(5, 10)])
|
||||||
|
|
||||||
|
new_model <- model[5:10]
|
||||||
|
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||||
|
expect_equal(new_pred, pred[, seq(5, 10)])
|
||||||
|
})
|
||||||
|
|
||||||
|
test_that("Slicing with non-unit step", {
|
||||||
|
for (s in 2:5) {
|
||||||
|
new_model <- xgb.slice.Booster(model, 1, 17, s)
|
||||||
|
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||||
|
expect_equal(new_pred, pred[, seq(1, 17, s)])
|
||||||
|
|
||||||
|
new_model <- model[seq(1, 17, s)]
|
||||||
|
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||||
|
expect_equal(new_pred, pred[, seq(1, 17, s)])
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
test_that("Slicing with non-unit step from middle", {
|
||||||
|
for (s in 2:5) {
|
||||||
|
new_model <- xgb.slice.Booster(model, 4, 17, s)
|
||||||
|
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||||
|
expect_equal(new_pred, pred[, seq(4, 17, s)])
|
||||||
|
|
||||||
|
new_model <- model[seq(4, 17, s)]
|
||||||
|
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||||
|
expect_equal(new_pred, pred[, seq(4, 17, s)])
|
||||||
|
}
|
||||||
|
})
|
||||||
Loading…
x
Reference in New Issue
Block a user