[R] Remove enable_categorical parameter (#10018)
This commit is contained in:
parent
3abbbe41ac
commit
df7cf744b4
@ -16,10 +16,6 @@
|
||||
#' \item `matrix` objects, with types `numeric`, `integer`, or `logical`.
|
||||
#' \item `data.frame` objects, with columns of types `numeric`, `integer`, `logical`, or `factor`.
|
||||
#'
|
||||
#' If passing `enable_categorical=TRUE`, columns with `factor` type will be treated as categorical.
|
||||
#' Otherwise, if passing `enable_categorical=FALSE` and the data contains `factor` columns, an error
|
||||
#' will be thrown.
|
||||
#'
|
||||
#' Note that xgboost uses base-0 encoding for categorical types, hence `factor` types (which use base-1
|
||||
#' encoding') will be converted inside the function call. Be aware that the encoding used for `factor`
|
||||
#' types is not kept as part of the model, so in subsequent calls to `predict`, it is the user's
|
||||
@ -59,7 +55,7 @@
|
||||
#' must be the same as in the DMatrix construction, regardless of the column names.
|
||||
#' @param feature_types Set types for features.
|
||||
#'
|
||||
#' If `data` is a `data.frame` and passing `enable_categorical=TRUE`, the types will be deduced
|
||||
#' If `data` is a `data.frame` and passing `feature_types` is not supplied, feature types will be deduced
|
||||
#' automatically from the column types.
|
||||
#'
|
||||
#' Otherwise, one can pass a character vector with the same length as number of columns in `data`,
|
||||
@ -79,18 +75,6 @@
|
||||
#' @param label_lower_bound Lower bound for survival training.
|
||||
#' @param label_upper_bound Upper bound for survival training.
|
||||
#' @param feature_weights Set feature weights for column sampling.
|
||||
#' @param enable_categorical Experimental support of specializing for categorical features.
|
||||
#'
|
||||
#' If passing 'TRUE' and 'data' is a data frame,
|
||||
#' columns of categorical types will automatically
|
||||
#' be set to be of categorical type (feature_type='c') in the resulting DMatrix.
|
||||
#'
|
||||
#' If passing 'FALSE' and 'data' is a data frame with categorical columns,
|
||||
#' it will result in an error being thrown.
|
||||
#'
|
||||
#' If 'data' is not a data frame, this argument is ignored.
|
||||
#'
|
||||
#' JSON/UBJSON serialization format is required for this.
|
||||
#' @return An 'xgb.DMatrix' object. If calling 'xgb.QuantileDMatrix', it will have additional
|
||||
#' subclass 'xgb.QuantileDMatrix'.
|
||||
#'
|
||||
@ -127,8 +111,7 @@ xgb.DMatrix <- function(
|
||||
qid = NULL,
|
||||
label_lower_bound = NULL,
|
||||
label_upper_bound = NULL,
|
||||
feature_weights = NULL,
|
||||
enable_categorical = FALSE
|
||||
feature_weights = NULL
|
||||
) {
|
||||
if (!is.null(group) && !is.null(qid)) {
|
||||
stop("Either one of 'group' or 'qid' should be NULL")
|
||||
@ -180,7 +163,7 @@ xgb.DMatrix <- function(
|
||||
nthread
|
||||
)
|
||||
} else if (is.data.frame(data)) {
|
||||
tmp <- .process.df.for.dmatrix(data, enable_categorical, feature_types)
|
||||
tmp <- .process.df.for.dmatrix(data, feature_types)
|
||||
feature_types <- tmp$feature_types
|
||||
handle <- .Call(
|
||||
XGDMatrixCreateFromDF_R, tmp$lst, missing, nthread
|
||||
@ -212,7 +195,7 @@ xgb.DMatrix <- function(
|
||||
return(dmat)
|
||||
}
|
||||
|
||||
.process.df.for.dmatrix <- function(df, enable_categorical, feature_types) {
|
||||
.process.df.for.dmatrix <- function(df, feature_types) {
|
||||
if (!nrow(df) || !ncol(df)) {
|
||||
stop("'data' is an empty data.frame.")
|
||||
}
|
||||
@ -225,12 +208,6 @@ xgb.DMatrix <- function(
|
||||
} else {
|
||||
feature_types <- sapply(df, function(col) {
|
||||
if (is.factor(col)) {
|
||||
if (!enable_categorical) {
|
||||
stop(
|
||||
"When factor type is used, the parameter `enable_categorical`",
|
||||
" must be set to TRUE."
|
||||
)
|
||||
}
|
||||
return("c")
|
||||
} else if (is.integer(col)) {
|
||||
return("int")
|
||||
@ -326,7 +303,6 @@ xgb.QuantileDMatrix <- function(
|
||||
label_lower_bound = NULL,
|
||||
label_upper_bound = NULL,
|
||||
feature_weights = NULL,
|
||||
enable_categorical = FALSE,
|
||||
ref = NULL,
|
||||
max_bin = NULL
|
||||
) {
|
||||
@ -357,8 +333,7 @@ xgb.QuantileDMatrix <- function(
|
||||
qid = qid,
|
||||
label_lower_bound = label_lower_bound,
|
||||
label_upper_bound = label_upper_bound,
|
||||
feature_weights = feature_weights,
|
||||
enable_categorical = enable_categorical
|
||||
feature_weights = feature_weights
|
||||
)
|
||||
)
|
||||
data_iterator <- .single.data.iterator(iterator_env)
|
||||
@ -470,8 +445,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
|
||||
qid = env[["qid"]],
|
||||
label_lower_bound = env[["label_lower_bound"]],
|
||||
label_upper_bound = env[["label_upper_bound"]],
|
||||
feature_weights = env[["feature_weights"]],
|
||||
enable_categorical = env[["enable_categorical"]]
|
||||
feature_weights = env[["feature_weights"]]
|
||||
)
|
||||
)
|
||||
}
|
||||
@ -540,8 +514,7 @@ xgb.ProxyDMatrix <- function(
|
||||
qid = NULL,
|
||||
label_lower_bound = NULL,
|
||||
label_upper_bound = NULL,
|
||||
feature_weights = NULL,
|
||||
enable_categorical = FALSE
|
||||
feature_weights = NULL
|
||||
) {
|
||||
stopifnot(inherits(data, c("matrix", "data.frame", "dgRMatrix")))
|
||||
out <- list(
|
||||
@ -555,8 +528,7 @@ xgb.ProxyDMatrix <- function(
|
||||
qid = qid,
|
||||
label_lower_bound = label_lower_bound,
|
||||
label_upper_bound = label_upper_bound,
|
||||
feature_weights = feature_weights,
|
||||
enable_categorical = enable_categorical
|
||||
feature_weights = feature_weights
|
||||
)
|
||||
class(out) <- "xgb.ProxyDMatrix"
|
||||
return(out)
|
||||
@ -575,7 +547,7 @@ xgb.ProxyDMatrix.internal <- function(proxy_handle, data_iterator) {
|
||||
stop("Either one of 'group' or 'qid' should be NULL")
|
||||
}
|
||||
if (is.data.frame(lst$data)) {
|
||||
tmp <- .process.df.for.dmatrix(lst$data, lst$enable_categorical, lst$feature_types)
|
||||
tmp <- .process.df.for.dmatrix(lst$data, lst$feature_types)
|
||||
lst$feature_types <- tmp$feature_types
|
||||
.Call(XGProxyDMatrixSetDataColumnar_R, proxy_handle, tmp$lst)
|
||||
rm(tmp)
|
||||
|
||||
@ -19,8 +19,7 @@ xgb.DMatrix(
|
||||
qid = NULL,
|
||||
label_lower_bound = NULL,
|
||||
label_upper_bound = NULL,
|
||||
feature_weights = NULL,
|
||||
enable_categorical = FALSE
|
||||
feature_weights = NULL
|
||||
)
|
||||
|
||||
xgb.QuantileDMatrix(
|
||||
@ -37,7 +36,6 @@ xgb.QuantileDMatrix(
|
||||
label_lower_bound = NULL,
|
||||
label_upper_bound = NULL,
|
||||
feature_weights = NULL,
|
||||
enable_categorical = FALSE,
|
||||
ref = NULL,
|
||||
max_bin = NULL
|
||||
)
|
||||
@ -50,10 +48,6 @@ Supported input types are as follows:\itemize{
|
||||
\item \code{matrix} objects, with types \code{numeric}, \code{integer}, or \code{logical}.
|
||||
\item \code{data.frame} objects, with columns of types \code{numeric}, \code{integer}, \code{logical}, or \code{factor}.
|
||||
|
||||
If passing \code{enable_categorical=TRUE}, columns with \code{factor} type will be treated as categorical.
|
||||
Otherwise, if passing \code{enable_categorical=FALSE} and the data contains \code{factor} columns, an error
|
||||
will be thrown.
|
||||
|
||||
Note that xgboost uses base-0 encoding for categorical types, hence \code{factor} types (which use base-1
|
||||
encoding') will be converted inside the function call. Be aware that the encoding used for \code{factor}
|
||||
types is not kept as part of the model, so in subsequent calls to \code{predict}, it is the user's
|
||||
@ -102,7 +96,7 @@ frame and matrix.
|
||||
|
||||
\item{feature_types}{Set types for features.
|
||||
|
||||
If \code{data} is a \code{data.frame} and passing \code{enable_categorical=TRUE}, the types will be deduced
|
||||
If \code{data} is a \code{data.frame} and passing \code{feature_types} is not supplied, feature types will be deduced
|
||||
automatically from the column types.
|
||||
|
||||
Otherwise, one can pass a character vector with the same length as number of columns in \code{data},
|
||||
@ -129,20 +123,6 @@ functionalities such as feature importances.}
|
||||
|
||||
\item{feature_weights}{Set feature weights for column sampling.}
|
||||
|
||||
\item{enable_categorical}{Experimental support of specializing for categorical features.
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ If passing 'TRUE' and 'data' is a data frame,
|
||||
columns of categorical types will automatically
|
||||
be set to be of categorical type (feature_type='c') in the resulting DMatrix.
|
||||
|
||||
If passing 'FALSE' and 'data' is a data frame with categorical columns,
|
||||
it will result in an error being thrown.
|
||||
|
||||
If 'data' is not a data frame, this argument is ignored.
|
||||
|
||||
JSON/UBJSON serialization format is required for this.
|
||||
}\if{html}{\out{</div>}}}
|
||||
|
||||
\item{ref}{The training dataset that provides quantile information, needed when creating
|
||||
validation/test dataset with \code{xgb.QuantileDMatrix}. Supplying the training DMatrix
|
||||
as a reference means that the same quantisation applied to the training data is
|
||||
|
||||
@ -15,8 +15,7 @@ xgb.ProxyDMatrix(
|
||||
qid = NULL,
|
||||
label_lower_bound = NULL,
|
||||
label_upper_bound = NULL,
|
||||
feature_weights = NULL,
|
||||
enable_categorical = FALSE
|
||||
feature_weights = NULL
|
||||
)
|
||||
}
|
||||
\arguments{
|
||||
@ -57,7 +56,7 @@ frame and matrix.
|
||||
|
||||
\item{feature_types}{Set types for features.
|
||||
|
||||
If \code{data} is a \code{data.frame} and passing \code{enable_categorical=TRUE}, the types will be deduced
|
||||
If \code{data} is a \code{data.frame} and passing \code{feature_types} is not supplied, feature types will be deduced
|
||||
automatically from the column types.
|
||||
|
||||
Otherwise, one can pass a character vector with the same length as number of columns in \code{data},
|
||||
@ -81,20 +80,6 @@ functionalities such as feature importances.}
|
||||
\item{label_upper_bound}{Upper bound for survival training.}
|
||||
|
||||
\item{feature_weights}{Set feature weights for column sampling.}
|
||||
|
||||
\item{enable_categorical}{Experimental support of specializing for categorical features.
|
||||
|
||||
\if{html}{\out{<div class="sourceCode">}}\preformatted{ If passing 'TRUE' and 'data' is a data frame,
|
||||
columns of categorical types will automatically
|
||||
be set to be of categorical type (feature_type='c') in the resulting DMatrix.
|
||||
|
||||
If passing 'FALSE' and 'data' is a data frame with categorical columns,
|
||||
it will result in an error being thrown.
|
||||
|
||||
If 'data' is not a data frame, this argument is ignored.
|
||||
|
||||
JSON/UBJSON serialization format is required for this.
|
||||
}\if{html}{\out{</div>}}}
|
||||
}
|
||||
\value{
|
||||
An object of class \code{xgb.ProxyDMatrix}, which is just a list containing the
|
||||
|
||||
@ -338,19 +338,18 @@ test_that("xgb.DMatrix: data.frame", {
|
||||
stringsAsFactors = TRUE
|
||||
)
|
||||
|
||||
m <- xgb.DMatrix(df, enable_categorical = TRUE)
|
||||
m <- xgb.DMatrix(df)
|
||||
expect_equal(colnames(m), colnames(df))
|
||||
expect_equal(
|
||||
getinfo(m, "feature_type"), c("float", "float", "int", "i", "c", "c")
|
||||
)
|
||||
expect_error(xgb.DMatrix(df, enable_categorical = FALSE))
|
||||
|
||||
df <- data.frame(
|
||||
missing = c("a", "b", "d", NA),
|
||||
valid = c("a", "b", "d", "c"),
|
||||
stringsAsFactors = TRUE
|
||||
)
|
||||
m <- xgb.DMatrix(df, enable_categorical = TRUE)
|
||||
m <- xgb.DMatrix(df)
|
||||
expect_equal(getinfo(m, "feature_type"), c("c", "c"))
|
||||
})
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user