[R] rename proxy dmatrix -> data batch (#10016)
This commit is contained in:
parent
1e72dc1276
commit
0955213220
@ -32,9 +32,9 @@ export(setinfo)
|
||||
export(xgb.DMatrix)
|
||||
export(xgb.DMatrix.hasinfo)
|
||||
export(xgb.DMatrix.save)
|
||||
export(xgb.DataBatch)
|
||||
export(xgb.DataIter)
|
||||
export(xgb.ExternalDMatrix)
|
||||
export(xgb.ProxyDMatrix)
|
||||
export(xgb.QuantileDMatrix)
|
||||
export(xgb.QuantileDMatrix.from_iterator)
|
||||
export(xgb.attr)
|
||||
|
||||
@ -348,7 +348,7 @@ xgb.QuantileDMatrix <- function(
|
||||
.Call(XGDMatrixFree_R, proxy_handle)
|
||||
})
|
||||
iterator_next <- function() {
|
||||
return(xgb.ProxyDMatrix.internal(proxy_handle, data_iterator))
|
||||
return(xgb.ProxyDMatrix(proxy_handle, data_iterator))
|
||||
}
|
||||
iterator_reset <- function() {
|
||||
return(data_iterator$f_reset(iterator_env))
|
||||
@ -391,12 +391,12 @@ xgb.QuantileDMatrix <- function(
|
||||
#' to know which part of the data to pass next.
|
||||
#' @param f_next `function(env)` which is responsible for:\itemize{
|
||||
#' \item Accessing or retrieving the next batch of data in the iterator.
|
||||
#' \item Supplying this data by calling function \link{xgb.ProxyDMatrix} on it and returning the result.
|
||||
#' \item Supplying this data by calling function \link{xgb.DataBatch} on it and returning the result.
|
||||
#' \item Keeping track of where in the iterator batch it is or will go next, which can for example
|
||||
#' be done by modifiying variables in the `env` variable that is passed here.
|
||||
#' \item Signaling whether there are more batches to be consumed or not, by returning `NULL`
|
||||
#' when the stream of data ends (all batches in the iterator have been consumed), or the result from
|
||||
#' calling \link{xgb.ProxyDMatrix} when there are more batches in the line to be consumed.
|
||||
#' calling \link{xgb.DataBatch} when there are more batches in the line to be consumed.
|
||||
#' }
|
||||
#' @param f_reset `function(env)` which is responsible for reseting the data iterator
|
||||
#' (i.e. taking it back to the first batch, called before and after the sequence of batches
|
||||
@ -406,7 +406,7 @@ xgb.QuantileDMatrix <- function(
|
||||
#' (and in the same order) must be passed in subsequent iterations.
|
||||
#' @return An `xgb.DataIter` object, containing the same inputs supplied here, which can then
|
||||
#' be passed to \link{xgb.ExternalDMatrix}.
|
||||
#' @seealso \link{xgb.ExternalDMatrix}, \link{xgb.ProxyDMatrix}.
|
||||
#' @seealso \link{xgb.ExternalDMatrix}, \link{xgb.DataBatch}.
|
||||
#' @export
|
||||
xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
if (!is.function(f_next)) {
|
||||
@ -434,7 +434,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
env[["iter"]] <- curr_iter + 1L
|
||||
})
|
||||
return(
|
||||
xgb.ProxyDMatrix(
|
||||
xgb.DataBatch(
|
||||
data = env[["data"]],
|
||||
label = env[["label"]],
|
||||
weight = env[["weight"]],
|
||||
@ -464,13 +464,13 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
.make.proxy.handle <- function() {
|
||||
out <- .Call(XGProxyDMatrixCreate_R)
|
||||
attributes(out) <- list(
|
||||
class = c("xgb.DMatrix", "xgb.ProxyDMatrixHandle"),
|
||||
class = c("xgb.DMatrix", "xgb.ProxyDMatrix"),
|
||||
fields = new.env()
|
||||
)
|
||||
return(out)
|
||||
}
|
||||
|
||||
#' @title Proxy DMatrix Updater
|
||||
#' @title Structure for Data Batches
|
||||
#' @description Helper function to supply data in batches of a data iterator when
|
||||
#' constructing a DMatrix from external memory through \link{xgb.ExternalDMatrix}
|
||||
#' or through \link{xgb.QuantileDMatrix.from_iterator}.
|
||||
@ -480,8 +480,8 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
#' when constructing a DMatrix through external memory - otherwise, one should call
|
||||
#' \link{xgb.DMatrix} or \link{xgb.QuantileDMatrix}.
|
||||
#'
|
||||
#' The object that results from calling this function directly is \bold{not} like the other
|
||||
#' `xgb.DMatrix` variants - i.e. cannot be used to train a model, nor to get predictions - only
|
||||
#' The object that results from calling this function directly is \bold{not} like
|
||||
#' an `xgb.DMatrix` - i.e. cannot be used to train a model, nor to get predictions - only
|
||||
#' possible usage is to supply data to an iterator, from which a DMatrix is then constructed.
|
||||
#'
|
||||
#' For more information and for example usage, see the documentation for \link{xgb.ExternalDMatrix}.
|
||||
@ -499,11 +499,11 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
#' \link{xgb.DMatrix} for details on it.
|
||||
#' \item CSR matrices, as class `dgRMatrix` from package `Matrix`.
|
||||
#' }
|
||||
#' @return An object of class `xgb.ProxyDMatrix`, which is just a list containing the
|
||||
#' @return An object of class `xgb.DataBatch`, which is just a list containing the
|
||||
#' data and parameters passed here. It does \bold{not} inherit from `xgb.DMatrix`.
|
||||
#' @seealso \link{xgb.DataIter}, \link{xgb.ExternalDMatrix}.
|
||||
#' @export
|
||||
xgb.ProxyDMatrix <- function(
|
||||
xgb.DataBatch <- function(
|
||||
data,
|
||||
label = NULL,
|
||||
weight = NULL,
|
||||
@ -530,17 +530,18 @@ xgb.ProxyDMatrix <- function(
|
||||
label_upper_bound = label_upper_bound,
|
||||
feature_weights = feature_weights
|
||||
)
|
||||
class(out) <- "xgb.ProxyDMatrix"
|
||||
class(out) <- "xgb.DataBatch"
|
||||
return(out)
|
||||
}
|
||||
|
||||
xgb.ProxyDMatrix.internal <- function(proxy_handle, data_iterator) {
|
||||
# This is only for internal usage, class is not exposed to the user.
|
||||
xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
|
||||
lst <- data_iterator$f_next(data_iterator$env)
|
||||
if (is.null(lst)) {
|
||||
return(0L)
|
||||
}
|
||||
if (!inherits(lst, "xgb.ProxyDMatrix")) {
|
||||
stop("DataIter 'f_next' must return either NULL or the result from calling 'xgb.ProxyDMatrix'.")
|
||||
if (!inherits(lst, "xgb.DataBatch")) {
|
||||
stop("DataIter 'f_next' must return either NULL or the result from calling 'xgb.DataBatch'.")
|
||||
}
|
||||
|
||||
if (!is.null(lst$group) && !is.null(lst$qid)) {
|
||||
@ -606,7 +607,7 @@ xgb.ProxyDMatrix.internal <- function(proxy_handle, data_iterator) {
|
||||
#' This should not pose any problem for `numeric` types, since they do have an inheret NaN value.
|
||||
#' @return An 'xgb.DMatrix' object, with subclass 'xgb.ExternalDMatrix', in which the data is not
|
||||
#' held internally but accessed through the iterator when needed.
|
||||
#' @seealso \link{xgb.DataIter}, \link{xgb.ProxyDMatrix}, \link{xgb.QuantileDMatrix.from_iterator}
|
||||
#' @seealso \link{xgb.DataIter}, \link{xgb.DataBatch}, \link{xgb.QuantileDMatrix.from_iterator}
|
||||
#' @examples
|
||||
#' library(xgboost)
|
||||
#' data(mtcars)
|
||||
@ -646,10 +647,10 @@ xgb.ProxyDMatrix.internal <- function(proxy_handle, data_iterator) {
|
||||
#' iterator_env[["iter"]] <- curr_iter + 1
|
||||
#' })
|
||||
#'
|
||||
#' # Function 'xgb.ProxyDMatrix' must be called manually
|
||||
#' # Function 'xgb.DataBatch' must be called manually
|
||||
#' # at each batch with all the appropriate attributes,
|
||||
#' # such as feature names and feature types.
|
||||
#' return(xgb.ProxyDMatrix(data = x_batch, label = y_batch))
|
||||
#' return(xgb.DataBatch(data = x_batch, label = y_batch))
|
||||
#' }
|
||||
#'
|
||||
#' # This moves the iterator back to its beginning
|
||||
@ -693,7 +694,7 @@ xgb.ExternalDMatrix <- function(
|
||||
.Call(XGDMatrixFree_R, proxy_handle)
|
||||
})
|
||||
iterator_next <- function() {
|
||||
return(xgb.ProxyDMatrix.internal(proxy_handle, data_iterator))
|
||||
return(xgb.ProxyDMatrix(proxy_handle, data_iterator))
|
||||
}
|
||||
iterator_reset <- function() {
|
||||
return(data_iterator$f_reset(data_iterator$env))
|
||||
@ -736,7 +737,7 @@ xgb.ExternalDMatrix <- function(
|
||||
#' @inheritParams xgb.ExternalDMatrix
|
||||
#' @inheritParams xgb.QuantileDMatrix
|
||||
#' @return An 'xgb.DMatrix' object, with subclass 'xgb.QuantileDMatrix'.
|
||||
#' @seealso \link{xgb.DataIter}, \link{xgb.ProxyDMatrix}, \link{xgb.ExternalDMatrix},
|
||||
#' @seealso \link{xgb.DataIter}, \link{xgb.DataBatch}, \link{xgb.ExternalDMatrix},
|
||||
#' \link{xgb.QuantileDMatrix}
|
||||
#' @export
|
||||
xgb.QuantileDMatrix.from_iterator <- function( # nolint
|
||||
@ -758,7 +759,7 @@ xgb.QuantileDMatrix.from_iterator <- function( # nolint
|
||||
.Call(XGDMatrixFree_R, proxy_handle)
|
||||
})
|
||||
iterator_next <- function() {
|
||||
return(xgb.ProxyDMatrix.internal(proxy_handle, data_iterator))
|
||||
return(xgb.ProxyDMatrix(proxy_handle, data_iterator))
|
||||
}
|
||||
iterator_reset <- function() {
|
||||
return(data_iterator$f_reset(data_iterator$env))
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.DMatrix.R
|
||||
\name{xgb.ProxyDMatrix}
|
||||
\alias{xgb.ProxyDMatrix}
|
||||
\title{Proxy DMatrix Updater}
|
||||
\name{xgb.DataBatch}
|
||||
\alias{xgb.DataBatch}
|
||||
\title{Structure for Data Batches}
|
||||
\usage{
|
||||
xgb.ProxyDMatrix(
|
||||
xgb.DataBatch(
|
||||
data,
|
||||
label = NULL,
|
||||
weight = NULL,
|
||||
@ -82,7 +82,7 @@ functionalities such as feature importances.}
|
||||
\item{feature_weights}{Set feature weights for column sampling.}
|
||||
}
|
||||
\value{
|
||||
An object of class \code{xgb.ProxyDMatrix}, which is just a list containing the
|
||||
An object of class \code{xgb.DataBatch}, which is just a list containing the
|
||||
data and parameters passed here. It does \bold{not} inherit from \code{xgb.DMatrix}.
|
||||
}
|
||||
\description{
|
||||
@ -95,8 +95,8 @@ is passed as argument to function \link{xgb.DataIter} to construct a data iterat
|
||||
when constructing a DMatrix through external memory - otherwise, one should call
|
||||
\link{xgb.DMatrix} or \link{xgb.QuantileDMatrix}.
|
||||
|
||||
The object that results from calling this function directly is \bold{not} like the other
|
||||
\code{xgb.DMatrix} variants - i.e. cannot be used to train a model, nor to get predictions - only
|
||||
The object that results from calling this function directly is \bold{not} like
|
||||
an \code{xgb.DMatrix} - i.e. cannot be used to train a model, nor to get predictions - only
|
||||
possible usage is to supply data to an iterator, from which a DMatrix is then constructed.
|
||||
|
||||
For more information and for example usage, see the documentation for \link{xgb.ExternalDMatrix}.
|
||||
@ -15,12 +15,12 @@ to know which part of the data to pass next.}
|
||||
|
||||
\item{f_next}{\verb{function(env)} which is responsible for:\itemize{
|
||||
\item Accessing or retrieving the next batch of data in the iterator.
|
||||
\item Supplying this data by calling function \link{xgb.ProxyDMatrix} on it and returning the result.
|
||||
\item Supplying this data by calling function \link{xgb.DataBatch} on it and returning the result.
|
||||
\item Keeping track of where in the iterator batch it is or will go next, which can for example
|
||||
be done by modifiying variables in the \code{env} variable that is passed here.
|
||||
\item Signaling whether there are more batches to be consumed or not, by returning \code{NULL}
|
||||
when the stream of data ends (all batches in the iterator have been consumed), or the result from
|
||||
calling \link{xgb.ProxyDMatrix} when there are more batches in the line to be consumed.
|
||||
calling \link{xgb.DataBatch} when there are more batches in the line to be consumed.
|
||||
}}
|
||||
|
||||
\item{f_reset}{\verb{function(env)} which is responsible for reseting the data iterator
|
||||
@ -47,5 +47,5 @@ which will consume the data and create a DMatrix from it by executing the callba
|
||||
For more information, and for a usage example, see the documentation for \link{xgb.ExternalDMatrix}.
|
||||
}
|
||||
\seealso{
|
||||
\link{xgb.ExternalDMatrix}, \link{xgb.ProxyDMatrix}.
|
||||
\link{xgb.ExternalDMatrix}, \link{xgb.DataBatch}.
|
||||
}
|
||||
|
||||
@ -87,10 +87,10 @@ iterator_next <- function(iterator_env) {
|
||||
iterator_env[["iter"]] <- curr_iter + 1
|
||||
})
|
||||
|
||||
# Function 'xgb.ProxyDMatrix' must be called manually
|
||||
# Function 'xgb.DataBatch' must be called manually
|
||||
# at each batch with all the appropriate attributes,
|
||||
# such as feature names and feature types.
|
||||
return(xgb.ProxyDMatrix(data = x_batch, label = y_batch))
|
||||
return(xgb.DataBatch(data = x_batch, label = y_batch))
|
||||
}
|
||||
|
||||
# This moves the iterator back to its beginning
|
||||
@ -118,5 +118,5 @@ pred_dm <- predict(model, dm)
|
||||
pred_mat <- predict(model, as.matrix(mtcars[, -1]))
|
||||
}
|
||||
\seealso{
|
||||
\link{xgb.DataIter}, \link{xgb.ProxyDMatrix}, \link{xgb.QuantileDMatrix.from_iterator}
|
||||
\link{xgb.DataIter}, \link{xgb.DataBatch}, \link{xgb.QuantileDMatrix.from_iterator}
|
||||
}
|
||||
|
||||
@ -60,6 +60,6 @@ For more information, see the guide 'Using XGBoost External Memory Version':
|
||||
\url{https://xgboost.readthedocs.io/en/stable/tutorials/external_memory.html}
|
||||
}
|
||||
\seealso{
|
||||
\link{xgb.DataIter}, \link{xgb.ProxyDMatrix}, \link{xgb.ExternalDMatrix},
|
||||
\link{xgb.DataIter}, \link{xgb.DataBatch}, \link{xgb.ExternalDMatrix},
|
||||
\link{xgb.QuantileDMatrix}
|
||||
}
|
||||
|
||||
@ -472,7 +472,7 @@ test_that("xgb.DMatrix: ExternalDMatrix produces the same results as regular DMa
|
||||
y = mtcars[, 1]
|
||||
)
|
||||
)
|
||||
iterator_next <- function(iterator_env, proxy_handle) {
|
||||
iterator_next <- function(iterator_env) {
|
||||
curr_iter <- iterator_env[["iter"]]
|
||||
if (curr_iter >= 2) {
|
||||
return(NULL)
|
||||
@ -487,7 +487,7 @@ test_that("xgb.DMatrix: ExternalDMatrix produces the same results as regular DMa
|
||||
on.exit({
|
||||
iterator_env[["iter"]] <- curr_iter + 1
|
||||
})
|
||||
return(xgb.ProxyDMatrix(data = x_batch, label = y_batch))
|
||||
return(xgb.DataBatch(data = x_batch, label = y_batch))
|
||||
}
|
||||
iterator_reset <- function(iterator_env) {
|
||||
iterator_env[["iter"]] <- 0
|
||||
@ -546,7 +546,7 @@ test_that("xgb.DMatrix: External QDM produces same results as regular QDM", {
|
||||
y = mtcars[, 1]
|
||||
)
|
||||
)
|
||||
iterator_next <- function(iterator_env, proxy_handle) {
|
||||
iterator_next <- function(iterator_env) {
|
||||
curr_iter <- iterator_env[["iter"]]
|
||||
if (curr_iter >= 2) {
|
||||
return(NULL)
|
||||
@ -561,7 +561,7 @@ test_that("xgb.DMatrix: External QDM produces same results as regular QDM", {
|
||||
on.exit({
|
||||
iterator_env[["iter"]] <- curr_iter + 1
|
||||
})
|
||||
return(xgb.ProxyDMatrix(data = x_batch, label = y_batch))
|
||||
return(xgb.DataBatch(data = x_batch, label = y_batch))
|
||||
}
|
||||
iterator_reset <- function(iterator_env) {
|
||||
iterator_env[["iter"]] <- 0
|
||||
@ -604,7 +604,7 @@ test_that("xgb.DMatrix: R errors thrown on DataIterator are thrown back to the u
|
||||
y = mtcars[, 1]
|
||||
)
|
||||
)
|
||||
iterator_next <- function(iterator_env, proxy_handle) {
|
||||
iterator_next <- function(iterator_env) {
|
||||
curr_iter <- iterator_env[["iter"]]
|
||||
if (curr_iter >= 2) {
|
||||
return(0)
|
||||
@ -618,7 +618,7 @@ test_that("xgb.DMatrix: R errors thrown on DataIterator are thrown back to the u
|
||||
on.exit({
|
||||
iterator_env[["iter"]] <- curr_iter + 1
|
||||
})
|
||||
return(xgb.ProxyDMatrix(data = x_batch, label = y_batch))
|
||||
return(xgb.DataBatch(data = x_batch, label = y_batch))
|
||||
}
|
||||
iterator_reset <- function(iterator_env) {
|
||||
iterator_env[["iter"]] <- 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user