[R] Rename ExternalDMatrix -> ExtMemDMatrix. (#10849)
This commit is contained in:
parent
9ee4008654
commit
c9f89c4241
@ -29,7 +29,7 @@ export(xgb.DMatrix.hasinfo)
|
||||
export(xgb.DMatrix.save)
|
||||
export(xgb.DataBatch)
|
||||
export(xgb.DataIter)
|
||||
export(xgb.ExternalDMatrix)
|
||||
export(xgb.ExtMemDMatrix)
|
||||
export(xgb.QuantileDMatrix)
|
||||
export(xgb.QuantileDMatrix.from_iterator)
|
||||
export(xgb.attr)
|
||||
|
||||
@ -418,10 +418,10 @@ xgb.QuantileDMatrix <- function(
|
||||
#' This function is responsible for generating an R object structure containing callback
|
||||
#' functions and an environment shared with them.
|
||||
#'
|
||||
#' The output structure from this function is then meant to be passed to [xgb.ExternalDMatrix()],
|
||||
#' The output structure from this function is then meant to be passed to [xgb.ExtMemDMatrix()],
|
||||
#' which will consume the data and create a DMatrix from it by executing the callback functions.
|
||||
#'
|
||||
#' For more information, and for a usage example, see the documentation for [xgb.ExternalDMatrix()].
|
||||
#' For more information, and for a usage example, see the documentation for [xgb.ExtMemDMatrix()].
|
||||
#'
|
||||
#' @param env An R environment to pass to the callback functions supplied here, which can be
|
||||
#' used to keep track of variables to determine how to handle the batches.
|
||||
@ -443,8 +443,8 @@ xgb.QuantileDMatrix <- function(
|
||||
#' Note that, after resetting the iterator, the batches will be accessed again, so the same data
|
||||
#' (and in the same order) must be passed in subsequent iterations.
|
||||
#' @return An `xgb.DataIter` object, containing the same inputs supplied here, which can then
|
||||
#' be passed to [xgb.ExternalDMatrix()].
|
||||
#' @seealso [xgb.ExternalDMatrix()], [xgb.DataBatch()].
|
||||
#' be passed to [xgb.ExtMemDMatrix()].
|
||||
#' @seealso [xgb.ExtMemDMatrix()], [xgb.DataBatch()].
|
||||
#' @export
|
||||
xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
if (!is.function(f_next)) {
|
||||
@ -512,7 +512,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
#'
|
||||
#' @description
|
||||
#' Helper function to supply data in batches of a data iterator when
|
||||
#' constructing a DMatrix from external memory through [xgb.ExternalDMatrix()]
|
||||
#' constructing a DMatrix from external memory through [xgb.ExtMemDMatrix()]
|
||||
#' or through [xgb.QuantileDMatrix.from_iterator()].
|
||||
#'
|
||||
#' This function is **only** meant to be called inside of a callback function (which
|
||||
@ -524,7 +524,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
#' an `xgb.DMatrix` - i.e. cannot be used to train a model, nor to get predictions - only
|
||||
#' possible usage is to supply data to an iterator, from which a DMatrix is then constructed.
|
||||
#'
|
||||
#' For more information and for example usage, see the documentation for [xgb.ExternalDMatrix()].
|
||||
#' For more information and for example usage, see the documentation for [xgb.ExtMemDMatrix()].
|
||||
#' @inheritParams xgb.DMatrix
|
||||
#' @param data Batch of data belonging to this batch.
|
||||
#'
|
||||
@ -532,7 +532,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
#' to pass here. Supported types are:
|
||||
#' - `matrix`, with types `numeric`, `integer`, and `logical`. Note that for types
|
||||
#' `integer` and `logical`, missing values might not be automatically recognized as
|
||||
#' as such - see the documentation for parameter `missing` in [xgb.ExternalDMatrix()]
|
||||
#' as such - see the documentation for parameter `missing` in [xgb.ExtMemDMatrix()]
|
||||
#' for details on this.
|
||||
#' - `data.frame`, with the same types as supported by 'xgb.DMatrix' and same
|
||||
#' conversions applied to it. See the documentation for parameter `data` in
|
||||
@ -540,7 +540,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
#' - CSR matrices, as class `dgRMatrix` from package "Matrix".
|
||||
#' @return An object of class `xgb.DataBatch`, which is just a list containing the
|
||||
#' data and parameters passed here. It does **not** inherit from `xgb.DMatrix`.
|
||||
#' @seealso [xgb.DataIter()], [xgb.ExternalDMatrix()].
|
||||
#' @seealso [xgb.DataIter()], [xgb.ExtMemDMatrix()].
|
||||
#' @export
|
||||
xgb.DataBatch <- function(
|
||||
data,
|
||||
@ -643,10 +643,10 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
|
||||
#'
|
||||
#' For example, in R `integer` types, missing values are represented by integer number `-2147483648`
|
||||
#' (since machine 'integer' types do not have an inherent 'NA' value) - hence, if one passes `NA`,
|
||||
#' which is interpreted as a floating-point NaN by [xgb.ExternalDMatrix()] and by
|
||||
#' which is interpreted as a floating-point NaN by [xgb.ExtMemDMatrix()] and by
|
||||
#' [xgb.QuantileDMatrix.from_iterator()], these integer missing values will not be treated as missing.
|
||||
#' This should not pose any problem for `numeric` types, since they do have an inheret NaN value.
|
||||
#' @return An 'xgb.DMatrix' object, with subclass 'xgb.ExternalDMatrix', in which the data is not
|
||||
#' @return An 'xgb.DMatrix' object, with subclass 'xgb.ExtMemDMatrix', in which the data is not
|
||||
#' held internally but accessed through the iterator when needed.
|
||||
#' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.QuantileDMatrix.from_iterator()]
|
||||
#' @examples
|
||||
@ -706,7 +706,7 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
|
||||
#' cache_prefix <- tempdir()
|
||||
#'
|
||||
#' # DMatrix will be constructed from the iterator's batches
|
||||
#' dm <- xgb.ExternalDMatrix(data_iterator, cache_prefix, nthread = 1)
|
||||
#' dm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1)
|
||||
#'
|
||||
#' # After construction, can be used as a regular DMatrix
|
||||
#' params <- list(nthread = 1, objective = "reg:squarederror")
|
||||
@ -717,7 +717,7 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) {
|
||||
#' pred_dm <- predict(model, dm)
|
||||
#' pred_mat <- predict(model, as.matrix(mtcars[, -1]))
|
||||
#' @export
|
||||
xgb.ExternalDMatrix <- function(
|
||||
xgb.ExtMemDMatrix <- function(
|
||||
data_iterator,
|
||||
cache_prefix = tempdir(),
|
||||
missing = NA,
|
||||
@ -753,7 +753,7 @@ xgb.ExternalDMatrix <- function(
|
||||
)
|
||||
|
||||
attributes(dmat) <- list(
|
||||
class = c("xgb.DMatrix", "xgb.ExternalDMatrix"),
|
||||
class = c("xgb.DMatrix", "xgb.ExtMemDMatrix"),
|
||||
fields = attributes(proxy_handle)$fields
|
||||
)
|
||||
return(dmat)
|
||||
@ -766,7 +766,7 @@ xgb.ExternalDMatrix <- function(
|
||||
#' Create an `xgb.QuantileDMatrix` object (exact same class as would be returned by
|
||||
#' calling function [xgb.QuantileDMatrix()], with the same advantages and limitations) from
|
||||
#' external data supplied by [xgb.DataIter()], potentially passed in batches from
|
||||
#' a bigger set that might not fit entirely in memory, same way as [xgb.ExternalDMatrix()].
|
||||
#' a bigger set that might not fit entirely in memory, same way as [xgb.ExtMemDMatrix()].
|
||||
#'
|
||||
#' Note that, while external data will only be loaded through the iterator (thus the full data
|
||||
#' might not be held entirely in-memory), the quantized representation of the data will get
|
||||
@ -776,10 +776,10 @@ xgb.ExternalDMatrix <- function(
|
||||
#'
|
||||
#' For more information, see the guide 'Using XGBoost External Memory Version':
|
||||
#' \url{https://xgboost.readthedocs.io/en/stable/tutorials/external_memory.html}
|
||||
#' @inheritParams xgb.ExternalDMatrix
|
||||
#' @inheritParams xgb.ExtMemDMatrix
|
||||
#' @inheritParams xgb.QuantileDMatrix
|
||||
#' @return An 'xgb.DMatrix' object, with subclass 'xgb.QuantileDMatrix'.
|
||||
#' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.ExternalDMatrix()],
|
||||
#' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.ExtMemDMatrix()],
|
||||
#' [xgb.QuantileDMatrix()]
|
||||
#' @export
|
||||
xgb.QuantileDMatrix.from_iterator <- function( # nolint
|
||||
@ -1318,8 +1318,8 @@ print.xgb.DMatrix <- function(x, verbose = FALSE, ...) {
|
||||
}
|
||||
class_print <- if (inherits(x, "xgb.QuantileDMatrix")) {
|
||||
"xgb.QuantileDMatrix"
|
||||
} else if (inherits(x, "xgb.ExternalDMatrix")) {
|
||||
"xgb.ExternalDMatrix"
|
||||
} else if (inherits(x, "xgb.ExtMemDMatrix")) {
|
||||
"xgb.ExtMemDMatrix"
|
||||
} else if (inherits(x, "xgb.ProxyDMatrix")) {
|
||||
"xgb.ProxyDMatrix"
|
||||
} else {
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
#' for model training by the objective.
|
||||
#'
|
||||
#' Note that only the basic `xgb.DMatrix` class is supported - variants such as `xgb.QuantileDMatrix`
|
||||
#' or `xgb.ExternalDMatrix` are not supported here.
|
||||
#' or `xgb.ExtMemDMatrix` are not supported here.
|
||||
#' @param nrounds The max number of iterations.
|
||||
#' @param nfold The original dataset is randomly partitioned into `nfold` equal size subsamples.
|
||||
#' @param prediction A logical value indicating whether to return the test fold predictions
|
||||
|
||||
@ -26,7 +26,7 @@ to pass here. Supported types are:
|
||||
\itemize{
|
||||
\item \code{matrix}, with types \code{numeric}, \code{integer}, and \code{logical}. Note that for types
|
||||
\code{integer} and \code{logical}, missing values might not be automatically recognized as
|
||||
as such - see the documentation for parameter \code{missing} in \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}
|
||||
as such - see the documentation for parameter \code{missing} in \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}
|
||||
for details on this.
|
||||
\item \code{data.frame}, with the same types as supported by 'xgb.DMatrix' and same
|
||||
conversions applied to it. See the documentation for parameter \code{data} in
|
||||
@ -92,7 +92,7 @@ data and parameters passed here. It does \strong{not} inherit from \code{xgb.DMa
|
||||
}
|
||||
\description{
|
||||
Helper function to supply data in batches of a data iterator when
|
||||
constructing a DMatrix from external memory through \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}
|
||||
constructing a DMatrix from external memory through \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}
|
||||
or through \code{\link[=xgb.QuantileDMatrix.from_iterator]{xgb.QuantileDMatrix.from_iterator()}}.
|
||||
|
||||
This function is \strong{only} meant to be called inside of a callback function (which
|
||||
@ -104,8 +104,8 @@ The object that results from calling this function directly is \strong{not} like
|
||||
an \code{xgb.DMatrix} - i.e. cannot be used to train a model, nor to get predictions - only
|
||||
possible usage is to supply data to an iterator, from which a DMatrix is then constructed.
|
||||
|
||||
For more information and for example usage, see the documentation for \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}.
|
||||
For more information and for example usage, see the documentation for \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}.
|
||||
}
|
||||
\seealso{
|
||||
\code{\link[=xgb.DataIter]{xgb.DataIter()}}, \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}.
|
||||
\code{\link[=xgb.DataIter]{xgb.DataIter()}}, \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}.
|
||||
}
|
||||
|
||||
@ -33,7 +33,7 @@ Note that, after resetting the iterator, the batches will be accessed again, so
|
||||
}
|
||||
\value{
|
||||
An \code{xgb.DataIter} object, containing the same inputs supplied here, which can then
|
||||
be passed to \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}.
|
||||
be passed to \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}.
|
||||
}
|
||||
\description{
|
||||
Interface to create a custom data iterator in order to construct a DMatrix
|
||||
@ -42,11 +42,11 @@ from external memory.
|
||||
This function is responsible for generating an R object structure containing callback
|
||||
functions and an environment shared with them.
|
||||
|
||||
The output structure from this function is then meant to be passed to \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}},
|
||||
The output structure from this function is then meant to be passed to \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}},
|
||||
which will consume the data and create a DMatrix from it by executing the callback functions.
|
||||
|
||||
For more information, and for a usage example, see the documentation for \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}.
|
||||
For more information, and for a usage example, see the documentation for \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}.
|
||||
}
|
||||
\seealso{
|
||||
\code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}, \code{\link[=xgb.DataBatch]{xgb.DataBatch()}}.
|
||||
\code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}, \code{\link[=xgb.DataBatch]{xgb.DataBatch()}}.
|
||||
}
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/xgb.DMatrix.R
|
||||
\name{xgb.ExternalDMatrix}
|
||||
\alias{xgb.ExternalDMatrix}
|
||||
\name{xgb.ExtMemDMatrix}
|
||||
\alias{xgb.ExtMemDMatrix}
|
||||
\title{DMatrix from External Data}
|
||||
\usage{
|
||||
xgb.ExternalDMatrix(
|
||||
xgb.ExtMemDMatrix(
|
||||
data_iterator,
|
||||
cache_prefix = tempdir(),
|
||||
missing = NA,
|
||||
@ -26,14 +26,14 @@ it will not be adapted for different input types.
|
||||
|
||||
For example, in R \code{integer} types, missing values are represented by integer number \code{-2147483648}
|
||||
(since machine 'integer' types do not have an inherent 'NA' value) - hence, if one passes \code{NA},
|
||||
which is interpreted as a floating-point NaN by \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}} and by
|
||||
which is interpreted as a floating-point NaN by \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}} and by
|
||||
\code{\link[=xgb.QuantileDMatrix.from_iterator]{xgb.QuantileDMatrix.from_iterator()}}, these integer missing values will not be treated as missing.
|
||||
This should not pose any problem for \code{numeric} types, since they do have an inheret NaN value.}
|
||||
|
||||
\item{nthread}{Number of threads used for creating DMatrix.}
|
||||
}
|
||||
\value{
|
||||
An 'xgb.DMatrix' object, with subclass 'xgb.ExternalDMatrix', in which the data is not
|
||||
An 'xgb.DMatrix' object, with subclass 'xgb.ExtMemDMatrix', in which the data is not
|
||||
held internally but accessed through the iterator when needed.
|
||||
}
|
||||
\description{
|
||||
@ -105,7 +105,7 @@ data_iterator <- xgb.DataIter(
|
||||
cache_prefix <- tempdir()
|
||||
|
||||
# DMatrix will be constructed from the iterator's batches
|
||||
dm <- xgb.ExternalDMatrix(data_iterator, cache_prefix, nthread = 1)
|
||||
dm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1)
|
||||
|
||||
# After construction, can be used as a regular DMatrix
|
||||
params <- list(nthread = 1, objective = "reg:squarederror")
|
||||
@ -25,7 +25,7 @@ it will not be adapted for different input types.
|
||||
|
||||
For example, in R \code{integer} types, missing values are represented by integer number \code{-2147483648}
|
||||
(since machine 'integer' types do not have an inherent 'NA' value) - hence, if one passes \code{NA},
|
||||
which is interpreted as a floating-point NaN by \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}} and by
|
||||
which is interpreted as a floating-point NaN by \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}} and by
|
||||
\code{\link[=xgb.QuantileDMatrix.from_iterator]{xgb.QuantileDMatrix.from_iterator()}}, these integer missing values will not be treated as missing.
|
||||
This should not pose any problem for \code{numeric} types, since they do have an inheret NaN value.}
|
||||
|
||||
@ -48,7 +48,7 @@ An 'xgb.DMatrix' object, with subclass 'xgb.QuantileDMatrix'.
|
||||
Create an \code{xgb.QuantileDMatrix} object (exact same class as would be returned by
|
||||
calling function \code{\link[=xgb.QuantileDMatrix]{xgb.QuantileDMatrix()}}, with the same advantages and limitations) from
|
||||
external data supplied by \code{\link[=xgb.DataIter]{xgb.DataIter()}}, potentially passed in batches from
|
||||
a bigger set that might not fit entirely in memory, same way as \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}.
|
||||
a bigger set that might not fit entirely in memory, same way as \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}.
|
||||
|
||||
Note that, while external data will only be loaded through the iterator (thus the full data
|
||||
might not be held entirely in-memory), the quantized representation of the data will get
|
||||
@ -60,6 +60,6 @@ For more information, see the guide 'Using XGBoost External Memory Version':
|
||||
\url{https://xgboost.readthedocs.io/en/stable/tutorials/external_memory.html}
|
||||
}
|
||||
\seealso{
|
||||
\code{\link[=xgb.DataIter]{xgb.DataIter()}}, \code{\link[=xgb.DataBatch]{xgb.DataBatch()}}, \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}},
|
||||
\code{\link[=xgb.DataIter]{xgb.DataIter()}}, \code{\link[=xgb.DataBatch]{xgb.DataBatch()}}, \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}},
|
||||
\code{\link[=xgb.QuantileDMatrix]{xgb.QuantileDMatrix()}}
|
||||
}
|
||||
|
||||
@ -53,7 +53,7 @@ system - thus, for reproducible results, one needs to call the \code{\link[=set.
|
||||
for model training by the objective.
|
||||
|
||||
Note that only the basic \code{xgb.DMatrix} class is supported - variants such as \code{xgb.QuantileDMatrix}
|
||||
or \code{xgb.ExternalDMatrix} are not supported here.}
|
||||
or \code{xgb.ExtMemDMatrix} are not supported here.}
|
||||
|
||||
\item{nrounds}{The max number of iterations.}
|
||||
|
||||
|
||||
@ -478,7 +478,7 @@ test_that("xgb.DMatrix: QuantileDMatrix is not accepted by exact method", {
|
||||
})
|
||||
})
|
||||
|
||||
test_that("xgb.DMatrix: ExternalDMatrix produces the same results as regular DMatrix", {
|
||||
test_that("xgb.DMatrix: ExtMemDMatrix produces the same results as regular DMatrix", {
|
||||
data(mtcars)
|
||||
y <- mtcars[, 1]
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
@ -528,8 +528,8 @@ test_that("xgb.DMatrix: ExternalDMatrix produces the same results as regular DMa
|
||||
f_reset = iterator_reset
|
||||
)
|
||||
cache_prefix <- tempdir()
|
||||
edm <- xgb.ExternalDMatrix(data_iterator, cache_prefix, nthread = 1)
|
||||
expect_true(inherits(edm, "xgb.ExternalDMatrix"))
|
||||
edm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1)
|
||||
expect_true(inherits(edm, "xgb.ExtMemDMatrix"))
|
||||
expect_true(inherits(edm, "xgb.DMatrix"))
|
||||
set.seed(123)
|
||||
model_ext <- xgb.train(
|
||||
@ -660,7 +660,7 @@ test_that("xgb.DMatrix: R errors thrown on DataIterator are thrown back to the u
|
||||
f_reset = iterator_reset
|
||||
)
|
||||
expect_error(
|
||||
{xgb.ExternalDMatrix(data_iterator, nthread = 1)},
|
||||
{xgb.ExtMemDMatrix(data_iterator, nthread = 1)},
|
||||
"custom error"
|
||||
)
|
||||
})
|
||||
|
||||
@ -37,7 +37,10 @@ def pack_rpackage() -> Path:
|
||||
output = subprocess.run(["git", "clean", "-xdf", "--dry-run"], capture_output=True)
|
||||
if output.returncode != 0:
|
||||
raise ValueError("Failed to check git repository status.", output)
|
||||
would_remove = output.stdout.decode("utf-8").strip().split("\n")
|
||||
if len(output.stdout) == 0:
|
||||
would_remove = None
|
||||
else:
|
||||
would_remove = output.stdout.decode("utf-8").strip().split("\n")
|
||||
|
||||
if would_remove and not all(f.find("tests/ci_build") != -1 for f in would_remove):
|
||||
raise ValueError(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user