[R] Make xgb.cv work with xgb.DMatrix only, adding support for survival and ranking fields (#10031)
--------- Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
@@ -334,7 +334,7 @@ test_that("xgb.cv works", {
|
||||
set.seed(11)
|
||||
expect_output(
|
||||
cv <- xgb.cv(
|
||||
data = train$data, label = train$label, max_depth = 2, nfold = 5,
|
||||
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, nfold = 5,
|
||||
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
eval_metric = "error", verbose = TRUE
|
||||
),
|
||||
@@ -357,13 +357,13 @@ test_that("xgb.cv works with stratified folds", {
|
||||
cv <- xgb.cv(
|
||||
data = dtrain, max_depth = 2, nfold = 5,
|
||||
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
verbose = TRUE, stratified = FALSE
|
||||
verbose = FALSE, stratified = FALSE
|
||||
)
|
||||
set.seed(314159)
|
||||
cv2 <- xgb.cv(
|
||||
data = dtrain, max_depth = 2, nfold = 5,
|
||||
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
|
||||
verbose = TRUE, stratified = TRUE
|
||||
verbose = FALSE, stratified = TRUE
|
||||
)
|
||||
# Stratified folds should result in a different evaluation logs
|
||||
expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
|
||||
@@ -885,3 +885,57 @@ test_that("Seed in params override PRNG from R", {
|
||||
)
|
||||
)
|
||||
})
|
||||
|
||||
test_that("xgb.cv works for AFT", {
|
||||
X <- matrix(c(1, -1, -1, 1, 0, 1, 1, 0), nrow = 4, byrow = TRUE) # 4x2 matrix
|
||||
dtrain <- xgb.DMatrix(X, nthread = n_threads)
|
||||
|
||||
params <- list(objective = 'survival:aft', learning_rate = 0.2, max_depth = 2L)
|
||||
|
||||
# data must have bounds
|
||||
expect_error(
|
||||
xgb.cv(
|
||||
params = params,
|
||||
data = dtrain,
|
||||
nround = 5L,
|
||||
nfold = 4L,
|
||||
nthread = n_threads
|
||||
)
|
||||
)
|
||||
|
||||
setinfo(dtrain, 'label_lower_bound', c(2, 3, 0, 4))
|
||||
setinfo(dtrain, 'label_upper_bound', c(2, Inf, 4, 5))
|
||||
|
||||
# automatic stratified splitting is turned off
|
||||
expect_warning(
|
||||
xgb.cv(
|
||||
params = params, data = dtrain, nround = 5L, nfold = 4L,
|
||||
nthread = n_threads, stratified = TRUE, verbose = FALSE
|
||||
)
|
||||
)
|
||||
|
||||
# this works without any issue
|
||||
expect_no_warning(
|
||||
xgb.cv(params = params, data = dtrain, nround = 5L, nfold = 4L, verbose = FALSE)
|
||||
)
|
||||
})
|
||||
|
||||
test_that("xgb.cv works for ranking", {
|
||||
data(iris)
|
||||
x <- iris[, -(4:5)]
|
||||
y <- as.integer(iris$Petal.Width)
|
||||
group <- rep(50, 3)
|
||||
dm <- xgb.DMatrix(x, label = y, group = group)
|
||||
res <- xgb.cv(
|
||||
data = dm,
|
||||
params = list(
|
||||
objective = "rank:pairwise",
|
||||
max_depth = 3
|
||||
),
|
||||
nrounds = 3,
|
||||
nfold = 2,
|
||||
verbose = FALSE,
|
||||
stratified = FALSE
|
||||
)
|
||||
expect_equal(length(res$folds), 2L)
|
||||
})
|
||||
|
||||
@@ -367,7 +367,7 @@ test_that("prediction in early-stopping xgb.cv works", {
|
||||
expect_output(
|
||||
cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.1, nrounds = 20,
|
||||
early_stopping_rounds = 5, maximize = FALSE, stratified = FALSE,
|
||||
prediction = TRUE, base_score = 0.5)
|
||||
prediction = TRUE, base_score = 0.5, verbose = TRUE)
|
||||
, "Stopping. Best iteration")
|
||||
|
||||
expect_false(is.null(cv$early_stop$best_iteration))
|
||||
@@ -387,7 +387,7 @@ test_that("prediction in xgb.cv for softprob works", {
|
||||
lb <- as.numeric(iris$Species) - 1
|
||||
set.seed(11)
|
||||
expect_warning(
|
||||
cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
|
||||
cv <- xgb.cv(data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), nfold = 4,
|
||||
eta = 0.5, nrounds = 5, max_depth = 3, nthread = n_threads,
|
||||
subsample = 0.8, gamma = 2, verbose = 0,
|
||||
prediction = TRUE, objective = "multi:softprob", num_class = 3)
|
||||
|
||||
@@ -243,7 +243,7 @@ test_that("xgb.DMatrix: print", {
|
||||
txt <- capture.output({
|
||||
print(dtrain)
|
||||
})
|
||||
expect_equal(txt, "xgb.DMatrix dim: 6513 x 126 info: label weight base_margin colnames: yes")
|
||||
expect_equal(txt, "xgb.DMatrix dim: 6513 x 126 info: base_margin, label, weight colnames: yes")
|
||||
|
||||
# DMatrix with just features
|
||||
dtrain <- xgb.DMatrix(
|
||||
@@ -724,6 +724,44 @@ test_that("xgb.DMatrix: quantile cuts look correct", {
|
||||
)
|
||||
})
|
||||
|
||||
test_that("xgb.DMatrix: slicing keeps field indicators", {
|
||||
data(mtcars)
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
y <- mtcars[, 1]
|
||||
dm <- xgb.DMatrix(
|
||||
data = x,
|
||||
label_lower_bound = -y,
|
||||
label_upper_bound = y,
|
||||
nthread = 1
|
||||
)
|
||||
idx_take <- seq(1, 5)
|
||||
dm_slice <- xgb.slice.DMatrix(dm, idx_take)
|
||||
|
||||
expect_true(xgb.DMatrix.hasinfo(dm_slice, "label_lower_bound"))
|
||||
expect_true(xgb.DMatrix.hasinfo(dm_slice, "label_upper_bound"))
|
||||
expect_false(xgb.DMatrix.hasinfo(dm_slice, "label"))
|
||||
|
||||
expect_equal(getinfo(dm_slice, "label_lower_bound"), -y[idx_take], tolerance = 1e-6)
|
||||
expect_equal(getinfo(dm_slice, "label_upper_bound"), y[idx_take], tolerance = 1e-6)
|
||||
})
|
||||
|
||||
test_that("xgb.DMatrix: can slice with groups", {
|
||||
data(iris)
|
||||
x <- as.matrix(iris[, -5])
|
||||
set.seed(123)
|
||||
y <- sample(3, size = nrow(x), replace = TRUE)
|
||||
group <- c(50, 50, 50)
|
||||
dm <- xgb.DMatrix(x, label = y, group = group, nthread = 1)
|
||||
idx_take <- seq(1, 50)
|
||||
dm_slice <- xgb.slice.DMatrix(dm, idx_take, allow_groups = TRUE)
|
||||
|
||||
expect_true(xgb.DMatrix.hasinfo(dm_slice, "label"))
|
||||
expect_false(xgb.DMatrix.hasinfo(dm_slice, "group"))
|
||||
expect_false(xgb.DMatrix.hasinfo(dm_slice, "qid"))
|
||||
expect_null(getinfo(dm_slice, "group"))
|
||||
expect_equal(getinfo(dm_slice, "label"), y[idx_take], tolerance = 1e-6)
|
||||
})
|
||||
|
||||
test_that("xgb.DMatrix: can read CSV", {
|
||||
txt <- paste(
|
||||
"1,2,3",
|
||||
|
||||
Reference in New Issue
Block a user