diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index f6cc9062c..035f4ae45 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -29,7 +29,7 @@ export(xgb.DMatrix.hasinfo) export(xgb.DMatrix.save) export(xgb.DataBatch) export(xgb.DataIter) -export(xgb.ExternalDMatrix) +export(xgb.ExtMemDMatrix) export(xgb.QuantileDMatrix) export(xgb.QuantileDMatrix.from_iterator) export(xgb.attr) diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index e0753412d..429cf3f04 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -418,10 +418,10 @@ xgb.QuantileDMatrix <- function( #' This function is responsible for generating an R object structure containing callback #' functions and an environment shared with them. #' -#' The output structure from this function is then meant to be passed to [xgb.ExternalDMatrix()], +#' The output structure from this function is then meant to be passed to [xgb.ExtMemDMatrix()], #' which will consume the data and create a DMatrix from it by executing the callback functions. #' -#' For more information, and for a usage example, see the documentation for [xgb.ExternalDMatrix()]. +#' For more information, and for a usage example, see the documentation for [xgb.ExtMemDMatrix()]. #' #' @param env An R environment to pass to the callback functions supplied here, which can be #' used to keep track of variables to determine how to handle the batches. @@ -443,8 +443,8 @@ xgb.QuantileDMatrix <- function( #' Note that, after resetting the iterator, the batches will be accessed again, so the same data #' (and in the same order) must be passed in subsequent iterations. #' @return An `xgb.DataIter` object, containing the same inputs supplied here, which can then -#' be passed to [xgb.ExternalDMatrix()]. -#' @seealso [xgb.ExternalDMatrix()], [xgb.DataBatch()]. +#' be passed to [xgb.ExtMemDMatrix()]. +#' @seealso [xgb.ExtMemDMatrix()], [xgb.DataBatch()]. #' @export xgb.DataIter <- function(env = new.env(), f_next, f_reset) { if (!is.function(f_next)) { @@ -512,7 +512,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) { #' #' @description #' Helper function to supply data in batches of a data iterator when -#' constructing a DMatrix from external memory through [xgb.ExternalDMatrix()] +#' constructing a DMatrix from external memory through [xgb.ExtMemDMatrix()] #' or through [xgb.QuantileDMatrix.from_iterator()]. #' #' This function is **only** meant to be called inside of a callback function (which @@ -524,7 +524,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) { #' an `xgb.DMatrix` - i.e. cannot be used to train a model, nor to get predictions - only #' possible usage is to supply data to an iterator, from which a DMatrix is then constructed. #' -#' For more information and for example usage, see the documentation for [xgb.ExternalDMatrix()]. +#' For more information and for example usage, see the documentation for [xgb.ExtMemDMatrix()]. #' @inheritParams xgb.DMatrix #' @param data Batch of data belonging to this batch. #' @@ -532,7 +532,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) { #' to pass here. Supported types are: #' - `matrix`, with types `numeric`, `integer`, and `logical`. Note that for types #' `integer` and `logical`, missing values might not be automatically recognized as -#' as such - see the documentation for parameter `missing` in [xgb.ExternalDMatrix()] +#' as such - see the documentation for parameter `missing` in [xgb.ExtMemDMatrix()] #' for details on this. #' - `data.frame`, with the same types as supported by 'xgb.DMatrix' and same #' conversions applied to it. See the documentation for parameter `data` in @@ -540,7 +540,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) { #' - CSR matrices, as class `dgRMatrix` from package "Matrix". #' @return An object of class `xgb.DataBatch`, which is just a list containing the #' data and parameters passed here. It does **not** inherit from `xgb.DMatrix`. -#' @seealso [xgb.DataIter()], [xgb.ExternalDMatrix()]. +#' @seealso [xgb.DataIter()], [xgb.ExtMemDMatrix()]. #' @export xgb.DataBatch <- function( data, @@ -643,10 +643,10 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) { #' #' For example, in R `integer` types, missing values are represented by integer number `-2147483648` #' (since machine 'integer' types do not have an inherent 'NA' value) - hence, if one passes `NA`, -#' which is interpreted as a floating-point NaN by [xgb.ExternalDMatrix()] and by +#' which is interpreted as a floating-point NaN by [xgb.ExtMemDMatrix()] and by #' [xgb.QuantileDMatrix.from_iterator()], these integer missing values will not be treated as missing. #' This should not pose any problem for `numeric` types, since they do have an inheret NaN value. -#' @return An 'xgb.DMatrix' object, with subclass 'xgb.ExternalDMatrix', in which the data is not +#' @return An 'xgb.DMatrix' object, with subclass 'xgb.ExtMemDMatrix', in which the data is not #' held internally but accessed through the iterator when needed. #' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.QuantileDMatrix.from_iterator()] #' @examples @@ -706,7 +706,7 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) { #' cache_prefix <- tempdir() #' #' # DMatrix will be constructed from the iterator's batches -#' dm <- xgb.ExternalDMatrix(data_iterator, cache_prefix, nthread = 1) +#' dm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1) #' #' # After construction, can be used as a regular DMatrix #' params <- list(nthread = 1, objective = "reg:squarederror") @@ -717,7 +717,7 @@ xgb.ProxyDMatrix <- function(proxy_handle, data_iterator) { #' pred_dm <- predict(model, dm) #' pred_mat <- predict(model, as.matrix(mtcars[, -1])) #' @export -xgb.ExternalDMatrix <- function( +xgb.ExtMemDMatrix <- function( data_iterator, cache_prefix = tempdir(), missing = NA, @@ -753,7 +753,7 @@ xgb.ExternalDMatrix <- function( ) attributes(dmat) <- list( - class = c("xgb.DMatrix", "xgb.ExternalDMatrix"), + class = c("xgb.DMatrix", "xgb.ExtMemDMatrix"), fields = attributes(proxy_handle)$fields ) return(dmat) @@ -766,7 +766,7 @@ xgb.ExternalDMatrix <- function( #' Create an `xgb.QuantileDMatrix` object (exact same class as would be returned by #' calling function [xgb.QuantileDMatrix()], with the same advantages and limitations) from #' external data supplied by [xgb.DataIter()], potentially passed in batches from -#' a bigger set that might not fit entirely in memory, same way as [xgb.ExternalDMatrix()]. +#' a bigger set that might not fit entirely in memory, same way as [xgb.ExtMemDMatrix()]. #' #' Note that, while external data will only be loaded through the iterator (thus the full data #' might not be held entirely in-memory), the quantized representation of the data will get @@ -776,10 +776,10 @@ xgb.ExternalDMatrix <- function( #' #' For more information, see the guide 'Using XGBoost External Memory Version': #' \url{https://xgboost.readthedocs.io/en/stable/tutorials/external_memory.html} -#' @inheritParams xgb.ExternalDMatrix +#' @inheritParams xgb.ExtMemDMatrix #' @inheritParams xgb.QuantileDMatrix #' @return An 'xgb.DMatrix' object, with subclass 'xgb.QuantileDMatrix'. -#' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.ExternalDMatrix()], +#' @seealso [xgb.DataIter()], [xgb.DataBatch()], [xgb.ExtMemDMatrix()], #' [xgb.QuantileDMatrix()] #' @export xgb.QuantileDMatrix.from_iterator <- function( # nolint @@ -1318,8 +1318,8 @@ print.xgb.DMatrix <- function(x, verbose = FALSE, ...) { } class_print <- if (inherits(x, "xgb.QuantileDMatrix")) { "xgb.QuantileDMatrix" - } else if (inherits(x, "xgb.ExternalDMatrix")) { - "xgb.ExternalDMatrix" + } else if (inherits(x, "xgb.ExtMemDMatrix")) { + "xgb.ExtMemDMatrix" } else if (inherits(x, "xgb.ProxyDMatrix")) { "xgb.ProxyDMatrix" } else { diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index cb74631a4..67821919f 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -24,7 +24,7 @@ #' for model training by the objective. #' #' Note that only the basic `xgb.DMatrix` class is supported - variants such as `xgb.QuantileDMatrix` -#' or `xgb.ExternalDMatrix` are not supported here. +#' or `xgb.ExtMemDMatrix` are not supported here. #' @param nrounds The max number of iterations. #' @param nfold The original dataset is randomly partitioned into `nfold` equal size subsamples. #' @param prediction A logical value indicating whether to return the test fold predictions diff --git a/R-package/man/xgb.DataBatch.Rd b/R-package/man/xgb.DataBatch.Rd index 46795af20..8b737b21b 100644 --- a/R-package/man/xgb.DataBatch.Rd +++ b/R-package/man/xgb.DataBatch.Rd @@ -26,7 +26,7 @@ to pass here. Supported types are: \itemize{ \item \code{matrix}, with types \code{numeric}, \code{integer}, and \code{logical}. Note that for types \code{integer} and \code{logical}, missing values might not be automatically recognized as -as such - see the documentation for parameter \code{missing} in \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}} +as such - see the documentation for parameter \code{missing} in \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}} for details on this. \item \code{data.frame}, with the same types as supported by 'xgb.DMatrix' and same conversions applied to it. See the documentation for parameter \code{data} in @@ -92,7 +92,7 @@ data and parameters passed here. It does \strong{not} inherit from \code{xgb.DMa } \description{ Helper function to supply data in batches of a data iterator when -constructing a DMatrix from external memory through \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}} +constructing a DMatrix from external memory through \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}} or through \code{\link[=xgb.QuantileDMatrix.from_iterator]{xgb.QuantileDMatrix.from_iterator()}}. This function is \strong{only} meant to be called inside of a callback function (which @@ -104,8 +104,8 @@ The object that results from calling this function directly is \strong{not} like an \code{xgb.DMatrix} - i.e. cannot be used to train a model, nor to get predictions - only possible usage is to supply data to an iterator, from which a DMatrix is then constructed. -For more information and for example usage, see the documentation for \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}. +For more information and for example usage, see the documentation for \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}. } \seealso{ -\code{\link[=xgb.DataIter]{xgb.DataIter()}}, \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}. +\code{\link[=xgb.DataIter]{xgb.DataIter()}}, \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}. } diff --git a/R-package/man/xgb.DataIter.Rd b/R-package/man/xgb.DataIter.Rd index bdbcf63ac..21dd12c9f 100644 --- a/R-package/man/xgb.DataIter.Rd +++ b/R-package/man/xgb.DataIter.Rd @@ -33,7 +33,7 @@ Note that, after resetting the iterator, the batches will be accessed again, so } \value{ An \code{xgb.DataIter} object, containing the same inputs supplied here, which can then -be passed to \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}. +be passed to \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}. } \description{ Interface to create a custom data iterator in order to construct a DMatrix @@ -42,11 +42,11 @@ from external memory. This function is responsible for generating an R object structure containing callback functions and an environment shared with them. -The output structure from this function is then meant to be passed to \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}, +The output structure from this function is then meant to be passed to \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}, which will consume the data and create a DMatrix from it by executing the callback functions. -For more information, and for a usage example, see the documentation for \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}. +For more information, and for a usage example, see the documentation for \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}. } \seealso{ -\code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}, \code{\link[=xgb.DataBatch]{xgb.DataBatch()}}. +\code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}, \code{\link[=xgb.DataBatch]{xgb.DataBatch()}}. } diff --git a/R-package/man/xgb.ExternalDMatrix.Rd b/R-package/man/xgb.ExtMemDMatrix.Rd similarity index 94% rename from R-package/man/xgb.ExternalDMatrix.Rd rename to R-package/man/xgb.ExtMemDMatrix.Rd index bbf733715..a4555f571 100644 --- a/R-package/man/xgb.ExternalDMatrix.Rd +++ b/R-package/man/xgb.ExtMemDMatrix.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/xgb.DMatrix.R -\name{xgb.ExternalDMatrix} -\alias{xgb.ExternalDMatrix} +\name{xgb.ExtMemDMatrix} +\alias{xgb.ExtMemDMatrix} \title{DMatrix from External Data} \usage{ -xgb.ExternalDMatrix( +xgb.ExtMemDMatrix( data_iterator, cache_prefix = tempdir(), missing = NA, @@ -26,14 +26,14 @@ it will not be adapted for different input types. For example, in R \code{integer} types, missing values are represented by integer number \code{-2147483648} (since machine 'integer' types do not have an inherent 'NA' value) - hence, if one passes \code{NA}, -which is interpreted as a floating-point NaN by \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}} and by +which is interpreted as a floating-point NaN by \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}} and by \code{\link[=xgb.QuantileDMatrix.from_iterator]{xgb.QuantileDMatrix.from_iterator()}}, these integer missing values will not be treated as missing. This should not pose any problem for \code{numeric} types, since they do have an inheret NaN value.} \item{nthread}{Number of threads used for creating DMatrix.} } \value{ -An 'xgb.DMatrix' object, with subclass 'xgb.ExternalDMatrix', in which the data is not +An 'xgb.DMatrix' object, with subclass 'xgb.ExtMemDMatrix', in which the data is not held internally but accessed through the iterator when needed. } \description{ @@ -105,7 +105,7 @@ data_iterator <- xgb.DataIter( cache_prefix <- tempdir() # DMatrix will be constructed from the iterator's batches -dm <- xgb.ExternalDMatrix(data_iterator, cache_prefix, nthread = 1) +dm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1) # After construction, can be used as a regular DMatrix params <- list(nthread = 1, objective = "reg:squarederror") diff --git a/R-package/man/xgb.QuantileDMatrix.from_iterator.Rd b/R-package/man/xgb.QuantileDMatrix.from_iterator.Rd index 7903159e1..c401d034b 100644 --- a/R-package/man/xgb.QuantileDMatrix.from_iterator.Rd +++ b/R-package/man/xgb.QuantileDMatrix.from_iterator.Rd @@ -25,7 +25,7 @@ it will not be adapted for different input types. For example, in R \code{integer} types, missing values are represented by integer number \code{-2147483648} (since machine 'integer' types do not have an inherent 'NA' value) - hence, if one passes \code{NA}, -which is interpreted as a floating-point NaN by \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}} and by +which is interpreted as a floating-point NaN by \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}} and by \code{\link[=xgb.QuantileDMatrix.from_iterator]{xgb.QuantileDMatrix.from_iterator()}}, these integer missing values will not be treated as missing. This should not pose any problem for \code{numeric} types, since they do have an inheret NaN value.} @@ -48,7 +48,7 @@ An 'xgb.DMatrix' object, with subclass 'xgb.QuantileDMatrix'. Create an \code{xgb.QuantileDMatrix} object (exact same class as would be returned by calling function \code{\link[=xgb.QuantileDMatrix]{xgb.QuantileDMatrix()}}, with the same advantages and limitations) from external data supplied by \code{\link[=xgb.DataIter]{xgb.DataIter()}}, potentially passed in batches from -a bigger set that might not fit entirely in memory, same way as \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}. +a bigger set that might not fit entirely in memory, same way as \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}. Note that, while external data will only be loaded through the iterator (thus the full data might not be held entirely in-memory), the quantized representation of the data will get @@ -60,6 +60,6 @@ For more information, see the guide 'Using XGBoost External Memory Version': \url{https://xgboost.readthedocs.io/en/stable/tutorials/external_memory.html} } \seealso{ -\code{\link[=xgb.DataIter]{xgb.DataIter()}}, \code{\link[=xgb.DataBatch]{xgb.DataBatch()}}, \code{\link[=xgb.ExternalDMatrix]{xgb.ExternalDMatrix()}}, +\code{\link[=xgb.DataIter]{xgb.DataIter()}}, \code{\link[=xgb.DataBatch]{xgb.DataBatch()}}, \code{\link[=xgb.ExtMemDMatrix]{xgb.ExtMemDMatrix()}}, \code{\link[=xgb.QuantileDMatrix]{xgb.QuantileDMatrix()}} } diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd index 5a3820e4d..c5686e201 100644 --- a/R-package/man/xgb.cv.Rd +++ b/R-package/man/xgb.cv.Rd @@ -53,7 +53,7 @@ system - thus, for reproducible results, one needs to call the \code{\link[=set. for model training by the objective. Note that only the basic \code{xgb.DMatrix} class is supported - variants such as \code{xgb.QuantileDMatrix} -or \code{xgb.ExternalDMatrix} are not supported here.} +or \code{xgb.ExtMemDMatrix} are not supported here.} \item{nrounds}{The max number of iterations.} diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R index 887f602be..6aa8cda4b 100644 --- a/R-package/tests/testthat/test_dmatrix.R +++ b/R-package/tests/testthat/test_dmatrix.R @@ -478,7 +478,7 @@ test_that("xgb.DMatrix: QuantileDMatrix is not accepted by exact method", { }) }) -test_that("xgb.DMatrix: ExternalDMatrix produces the same results as regular DMatrix", { +test_that("xgb.DMatrix: ExtMemDMatrix produces the same results as regular DMatrix", { data(mtcars) y <- mtcars[, 1] x <- as.matrix(mtcars[, -1]) @@ -528,8 +528,8 @@ test_that("xgb.DMatrix: ExternalDMatrix produces the same results as regular DMa f_reset = iterator_reset ) cache_prefix <- tempdir() - edm <- xgb.ExternalDMatrix(data_iterator, cache_prefix, nthread = 1) - expect_true(inherits(edm, "xgb.ExternalDMatrix")) + edm <- xgb.ExtMemDMatrix(data_iterator, cache_prefix, nthread = 1) + expect_true(inherits(edm, "xgb.ExtMemDMatrix")) expect_true(inherits(edm, "xgb.DMatrix")) set.seed(123) model_ext <- xgb.train( @@ -660,7 +660,7 @@ test_that("xgb.DMatrix: R errors thrown on DataIterator are thrown back to the u f_reset = iterator_reset ) expect_error( - {xgb.ExternalDMatrix(data_iterator, nthread = 1)}, + {xgb.ExtMemDMatrix(data_iterator, nthread = 1)}, "custom error" ) }) diff --git a/tests/ci_build/test_r_package.py b/tests/ci_build/test_r_package.py index 735140a80..5ca7fa69b 100644 --- a/tests/ci_build/test_r_package.py +++ b/tests/ci_build/test_r_package.py @@ -37,7 +37,10 @@ def pack_rpackage() -> Path: output = subprocess.run(["git", "clean", "-xdf", "--dry-run"], capture_output=True) if output.returncode != 0: raise ValueError("Failed to check git repository status.", output) - would_remove = output.stdout.decode("utf-8").strip().split("\n") + if len(output.stdout) == 0: + would_remove = None + else: + would_remove = output.stdout.decode("utf-8").strip().split("\n") if would_remove and not all(f.find("tests/ci_build") != -1 for f in would_remove): raise ValueError(