[R] remove 'reshape' argument, let shapes be handled by core cpp library (#10330)
This commit is contained in:
@@ -162,20 +162,20 @@ test_that("train and predict softprob", {
|
||||
pred <- predict(bst, as.matrix(iris[, -5]))
|
||||
expect_length(pred, nrow(iris) * 3)
|
||||
# row sums add up to total probability of 1:
|
||||
expect_equal(rowSums(matrix(pred, ncol = 3, byrow = TRUE)), rep(1, nrow(iris)), tolerance = 1e-7)
|
||||
expect_equal(rowSums(pred), rep(1, nrow(iris)), tolerance = 1e-7)
|
||||
# manually calculate error at the last iteration:
|
||||
mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
|
||||
expect_equal(as.numeric(t(mpred)), pred)
|
||||
mpred <- predict(bst, as.matrix(iris[, -5]))
|
||||
expect_equal(mpred, pred)
|
||||
pred_labels <- max.col(mpred) - 1
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)
|
||||
# manually calculate error at the 1st iteration:
|
||||
mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
|
||||
mpred <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 1))
|
||||
pred_labels <- max.col(mpred) - 1
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[1, train_merror], err, tolerance = 5e-6)
|
||||
|
||||
mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
|
||||
mpred1 <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 1))
|
||||
expect_equal(mpred, mpred1)
|
||||
|
||||
d <- cbind(
|
||||
@@ -190,7 +190,7 @@ test_that("train and predict softprob", {
|
||||
data = dtrain, nrounds = 4, num_class = 10,
|
||||
objective = "multi:softprob"
|
||||
)
|
||||
predt <- predict(booster, as.matrix(d), reshape = TRUE, strict_shape = FALSE)
|
||||
predt <- predict(booster, as.matrix(d), strict_shape = FALSE)
|
||||
expect_equal(ncol(predt), 10)
|
||||
expect_equal(rowSums(predt), rep(1, 100), tolerance = 1e-7)
|
||||
})
|
||||
@@ -254,13 +254,13 @@ test_that("train and predict RF with softprob", {
|
||||
)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 15)
|
||||
# predict for all iterations:
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
|
||||
pred <- predict(bst, as.matrix(iris[, -5]))
|
||||
expect_equal(dim(pred), c(nrow(iris), 3))
|
||||
pred_labels <- max.col(pred) - 1
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)
|
||||
# predict for 7 iterations and adjust for 4 parallel trees per iteration
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 7))
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), iterationrange = c(1, 7))
|
||||
err <- sum((max.col(pred) - 1) != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[7, train_merror], err, tolerance = 5e-6)
|
||||
})
|
||||
@@ -485,15 +485,25 @@ test_that("strict_shape works", {
|
||||
n_rows <- nrow(X)
|
||||
n_cols <- ncol(X)
|
||||
|
||||
expect_equal(dim(predt), c(n_groups, n_rows))
|
||||
expect_equal(dim(margin), c(n_groups, n_rows))
|
||||
expect_equal(dim(contri), c(n_cols + 1, n_groups, n_rows))
|
||||
expect_equal(dim(interact), c(n_cols + 1, n_cols + 1, n_groups, n_rows))
|
||||
expect_equal(dim(leaf), c(1, n_groups, n_rounds, n_rows))
|
||||
expect_equal(dim(predt), c(n_rows, n_groups))
|
||||
expect_equal(dim(margin), c(n_rows, n_groups))
|
||||
expect_equal(dim(contri), c(n_rows, n_groups, n_cols + 1))
|
||||
expect_equal(dim(interact), c(n_rows, n_groups, n_cols + 1, n_cols + 1))
|
||||
expect_equal(dim(leaf), c(n_rows, n_rounds, n_groups, 1))
|
||||
|
||||
if (n_groups != 1) {
|
||||
for (g in seq_len(n_groups)) {
|
||||
expect_lt(max(abs(colSums(contri[, g, ]) - margin[g, ])), 1e-5)
|
||||
expect_lt(max(abs(rowSums(contri[, g, ]) - margin[, g])), 1e-5)
|
||||
}
|
||||
|
||||
leaf_no_strict <- predict(bst, X, strict_shape = FALSE, predleaf = TRUE)
|
||||
for (g in seq_len(n_groups)) {
|
||||
g_mask <- rep(FALSE, n_groups)
|
||||
g_mask[g] <- TRUE
|
||||
expect_equal(
|
||||
leaf[, , g, 1L],
|
||||
leaf_no_strict[, g_mask]
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -562,7 +572,7 @@ test_that("Quantile regression accepts multiple quantiles", {
|
||||
),
|
||||
nrounds = 15
|
||||
)
|
||||
pred <- predict(model, x, reshape = TRUE)
|
||||
pred <- predict(model, x)
|
||||
|
||||
expect_equal(dim(pred)[1], nrow(x))
|
||||
expect_equal(dim(pred)[2], 3)
|
||||
@@ -590,7 +600,7 @@ test_that("Can use multi-output labels with built-in objectives", {
|
||||
data = dm,
|
||||
nrounds = 5
|
||||
)
|
||||
pred <- predict(model, x, reshape = TRUE)
|
||||
pred <- predict(model, x)
|
||||
expect_equal(pred[, 1], -pred[, 2])
|
||||
expect_true(cor(y, pred[, 1]) > 0.9)
|
||||
expect_true(cor(y, pred[, 2]) < -0.9)
|
||||
@@ -619,7 +629,7 @@ test_that("Can use multi-output labels with custom objectives", {
|
||||
data = dm,
|
||||
nrounds = 5
|
||||
)
|
||||
pred <- predict(model, x, reshape = TRUE)
|
||||
pred <- predict(model, x)
|
||||
expect_equal(pred[, 1], -pred[, 2])
|
||||
expect_true(cor(y, pred[, 1]) > 0.9)
|
||||
expect_true(cor(y, pred[, 2]) < -0.9)
|
||||
@@ -666,8 +676,8 @@ test_that("Can predict on data.frame objects", {
|
||||
nrounds = 5
|
||||
)
|
||||
|
||||
pred_mat <- predict(model, xgb.DMatrix(x_mat), nthread = n_threads)
|
||||
pred_df <- predict(model, x_df, nthread = n_threads)
|
||||
pred_mat <- predict(model, xgb.DMatrix(x_mat))
|
||||
pred_df <- predict(model, x_df)
|
||||
expect_equal(pred_mat, pred_df)
|
||||
})
|
||||
|
||||
@@ -737,7 +747,7 @@ test_that("Coefficients from gblinear have the expected shape and names", {
|
||||
expect_equal(nrow(coefs), ncol(x) + 1)
|
||||
expect_equal(ncol(coefs), 3)
|
||||
expect_equal(row.names(coefs), c("(Intercept)", colnames(x)))
|
||||
pred_auto <- predict(model, x, outputmargin = TRUE, reshape = TRUE)
|
||||
pred_auto <- predict(model, x, outputmargin = TRUE)
|
||||
pred_manual <- unname(mm %*% coefs)
|
||||
expect_equal(pred_manual, pred_auto, tolerance = 1e-7)
|
||||
})
|
||||
|
||||
@@ -9,7 +9,7 @@ model <- xgb.train(
|
||||
data = dm,
|
||||
nrounds = 20
|
||||
)
|
||||
pred <- predict(model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
pred <- predict(model, dm, predleaf = TRUE)
|
||||
|
||||
test_that("Slicing full model", {
|
||||
new_model <- xgb.slice.Booster(model, 1, 0)
|
||||
@@ -24,32 +24,32 @@ test_that("Slicing full model", {
|
||||
|
||||
test_that("Slicing sequence from start", {
|
||||
new_model <- xgb.slice.Booster(model, 1, 10)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(1, 10)])
|
||||
|
||||
new_model <- model[1:10]
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(1, 10)])
|
||||
})
|
||||
|
||||
test_that("Slicing sequence from middle", {
|
||||
new_model <- xgb.slice.Booster(model, 5, 10)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(5, 10)])
|
||||
|
||||
new_model <- model[5:10]
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(5, 10)])
|
||||
})
|
||||
|
||||
test_that("Slicing with non-unit step", {
|
||||
for (s in 2:5) {
|
||||
new_model <- xgb.slice.Booster(model, 1, 17, s)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(1, 17, s)])
|
||||
|
||||
new_model <- model[seq(1, 17, s)]
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(1, 17, s)])
|
||||
}
|
||||
})
|
||||
@@ -57,11 +57,11 @@ test_that("Slicing with non-unit step", {
|
||||
test_that("Slicing with non-unit step from middle", {
|
||||
for (s in 2:5) {
|
||||
new_model <- xgb.slice.Booster(model, 4, 17, s)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(4, 17, s)])
|
||||
|
||||
new_model <- model[seq(4, 17, s)]
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE, reshape = TRUE)
|
||||
new_pred <- predict(new_model, dm, predleaf = TRUE)
|
||||
expect_equal(new_pred, pred[, seq(4, 17, s)])
|
||||
}
|
||||
})
|
||||
|
||||
@@ -400,12 +400,10 @@ test_that("xgb.DMatrix: can take multi-dimensional 'base_margin'", {
|
||||
),
|
||||
nround = 1
|
||||
)
|
||||
pred_only_x <- predict(model, x, nthread = n_threads, reshape = TRUE)
|
||||
pred_only_x <- predict(model, x)
|
||||
pred_w_base <- predict(
|
||||
model,
|
||||
xgb.DMatrix(data = x, base_margin = b, nthread = n_threads),
|
||||
nthread = n_threads,
|
||||
reshape = TRUE
|
||||
xgb.DMatrix(data = x, base_margin = b)
|
||||
)
|
||||
expect_equal(pred_only_x, pred_w_base - b, tolerance = 1e-5)
|
||||
})
|
||||
|
||||
@@ -132,31 +132,31 @@ test_that("predict feature contributions works", {
|
||||
tolerance = float_tolerance)
|
||||
|
||||
# gbtree multiclass
|
||||
pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
|
||||
pred <- predict(mbst.Tree, as.matrix(iris[, -5]), outputmargin = TRUE)
|
||||
pred_contr <- predict(mbst.Tree, as.matrix(iris[, -5]), predcontrib = TRUE)
|
||||
expect_is(pred_contr, "list")
|
||||
expect_length(pred_contr, 3)
|
||||
for (g in seq_along(pred_contr)) {
|
||||
expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "(Intercept)"))
|
||||
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), 1e-5)
|
||||
expect_is(pred_contr, "array")
|
||||
expect_length(dim(pred_contr), 3)
|
||||
for (g in seq_len(dim(pred_contr)[2])) {
|
||||
expect_equal(colnames(pred_contr[, g, ]), c(colnames(iris[, -5]), "(Intercept)"))
|
||||
expect_lt(max(abs(rowSums(pred_contr[, g, ]) - pred[, g])), 1e-5)
|
||||
}
|
||||
|
||||
# gblinear multiclass (set base_score = 0, which is base margin in multiclass)
|
||||
pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE, reshape = TRUE)
|
||||
pred <- predict(mbst.GLM, as.matrix(iris[, -5]), outputmargin = TRUE)
|
||||
pred_contr <- predict(mbst.GLM, as.matrix(iris[, -5]), predcontrib = TRUE)
|
||||
expect_length(pred_contr, 3)
|
||||
expect_length(dim(pred_contr), 3)
|
||||
coefs_all <- matrix(
|
||||
data = as.numeric(xgb.dump(mbst.GLM)[-c(1, 2, 6)]),
|
||||
ncol = 3,
|
||||
byrow = TRUE
|
||||
)
|
||||
for (g in seq_along(pred_contr)) {
|
||||
expect_equal(colnames(pred_contr[[g]]), c(colnames(iris[, -5]), "(Intercept)"))
|
||||
expect_lt(max(abs(rowSums(pred_contr[[g]]) - pred[, g])), float_tolerance)
|
||||
for (g in seq_along(dim(pred_contr)[2])) {
|
||||
expect_equal(colnames(pred_contr[, g, ]), c(colnames(iris[, -5]), "(Intercept)"))
|
||||
expect_lt(max(abs(rowSums(pred_contr[, g, ]) - pred[, g])), float_tolerance)
|
||||
# manual calculation of linear terms
|
||||
coefs <- c(coefs_all[-1, g], coefs_all[1, g]) # intercept needs to be the last
|
||||
pred_contr_manual <- sweep(as.matrix(cbind(iris[, -5], 1)), 2, coefs, FUN = "*")
|
||||
expect_equal(as.numeric(pred_contr[[g]]), as.numeric(pred_contr_manual),
|
||||
expect_equal(as.numeric(pred_contr[, g, ]), as.numeric(pred_contr_manual),
|
||||
tolerance = float_tolerance)
|
||||
}
|
||||
})
|
||||
|
||||
@@ -127,41 +127,23 @@ test_that("multiclass feature interactions work", {
|
||||
eta = 0.1, max_depth = 4, objective = 'multi:softprob', num_class = 3, nthread = n_threads
|
||||
)
|
||||
b <- xgb.train(param, dm, 40)
|
||||
pred <- t(
|
||||
array(
|
||||
data = predict(b, dm, outputmargin = TRUE),
|
||||
dim = c(3, 150)
|
||||
)
|
||||
)
|
||||
pred <- predict(b, dm, outputmargin = TRUE)
|
||||
|
||||
# SHAP contributions:
|
||||
cont <- predict(b, dm, predcontrib = TRUE)
|
||||
expect_length(cont, 3)
|
||||
# rewrap them as a 3d array
|
||||
cont <- array(
|
||||
data = unlist(cont),
|
||||
dim = c(150, 5, 3)
|
||||
)
|
||||
expect_length(dim(cont), 3)
|
||||
|
||||
# make sure for each row they add up to marginal predictions
|
||||
expect_lt(max(abs(apply(cont, c(1, 3), sum) - pred)), 0.001)
|
||||
expect_lt(max(abs(apply(cont, c(1, 2), sum) - pred)), 0.001)
|
||||
|
||||
# SHAP interaction contributions:
|
||||
intr <- predict(b, dm, predinteraction = TRUE)
|
||||
expect_length(intr, 3)
|
||||
# rewrap them as a 4d array
|
||||
intr <- aperm(
|
||||
a = array(
|
||||
data = unlist(intr),
|
||||
dim = c(150, 5, 5, 3)
|
||||
),
|
||||
perm = c(4, 1, 2, 3) # [grp, row, col, col]
|
||||
)
|
||||
expect_length(dim(intr), 4)
|
||||
|
||||
# check the symmetry
|
||||
expect_lt(max(abs(aperm(intr, c(1, 2, 4, 3)) - intr)), 0.00001)
|
||||
# sums WRT columns must be close to feature contributions
|
||||
expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - aperm(cont, c(3, 1, 2)))), 0.00001)
|
||||
expect_lt(max(abs(apply(intr, c(1, 2, 3), sum) - cont)), 0.00001)
|
||||
})
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user