[R] Remove parameters and attributes related to ntree and rebase iterationrange (#9935)
This commit is contained in:
@@ -33,15 +33,11 @@ test_that("train and predict binary classification", {
|
||||
pred <- predict(bst, test$data)
|
||||
expect_length(pred, 1611)
|
||||
|
||||
pred1 <- predict(bst, train$data, ntreelimit = 1)
|
||||
pred1 <- predict(bst, train$data, iterationrange = c(1, 1))
|
||||
expect_length(pred1, 6513)
|
||||
err_pred1 <- sum((pred1 > 0.5) != train$label) / length(train$label)
|
||||
err_log <- attributes(bst)$evaluation_log[1, train_error]
|
||||
expect_lt(abs(err_pred1 - err_log), 10e-6)
|
||||
|
||||
pred2 <- predict(bst, train$data, iterationrange = c(1, 2))
|
||||
expect_length(pred1, 6513)
|
||||
expect_equal(pred1, pred2)
|
||||
})
|
||||
|
||||
test_that("parameter validation works", {
|
||||
@@ -117,8 +113,8 @@ test_that("dart prediction works", {
|
||||
nrounds = nrounds,
|
||||
objective = "reg:squarederror"
|
||||
)
|
||||
pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, ntreelimit = 0)
|
||||
pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, ntreelimit = nrounds)
|
||||
pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, iterationrange = NULL)
|
||||
pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, iterationrange = c(1, nrounds))
|
||||
expect_true(all(matrix(pred_by_xgboost_0, byrow = TRUE) == matrix(pred_by_xgboost_1, byrow = TRUE)))
|
||||
|
||||
pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE)
|
||||
@@ -139,8 +135,8 @@ test_that("dart prediction works", {
|
||||
data = dtrain,
|
||||
nrounds = nrounds
|
||||
)
|
||||
pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, ntreelimit = 0)
|
||||
pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, ntreelimit = nrounds)
|
||||
pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, iterationrange = NULL)
|
||||
pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, iterationrange = c(1, nrounds))
|
||||
pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)
|
||||
|
||||
expect_true(all(matrix(pred_by_train_0, byrow = TRUE) == matrix(pred_by_xgboost_0, byrow = TRUE)))
|
||||
@@ -162,7 +158,7 @@ test_that("train and predict softprob", {
|
||||
)
|
||||
expect_false(is.null(attributes(bst)$evaluation_log))
|
||||
expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 5)
|
||||
pred <- predict(bst, as.matrix(iris[, -5]))
|
||||
expect_length(pred, nrow(iris) * 3)
|
||||
# row sums add up to total probability of 1:
|
||||
@@ -174,12 +170,12 @@ test_that("train and predict softprob", {
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[5, train_merror], err, tolerance = 5e-6)
|
||||
# manually calculate error at the 1st iteration:
|
||||
mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 1)
|
||||
mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
|
||||
pred_labels <- max.col(mpred) - 1
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[1, train_merror], err, tolerance = 5e-6)
|
||||
|
||||
mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 2))
|
||||
mpred1 <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 1))
|
||||
expect_equal(mpred, mpred1)
|
||||
|
||||
d <- cbind(
|
||||
@@ -213,7 +209,7 @@ test_that("train and predict softmax", {
|
||||
)
|
||||
expect_false(is.null(attributes(bst)$evaluation_log))
|
||||
expect_lt(attributes(bst)$evaluation_log[, min(train_merror)], 0.025)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst) * 3, xgb.ntree(bst))
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 5)
|
||||
|
||||
pred <- predict(bst, as.matrix(iris[, -5]))
|
||||
expect_length(pred, nrow(iris))
|
||||
@@ -233,19 +229,15 @@ test_that("train and predict RF", {
|
||||
watchlist = list(train = xgb.DMatrix(train$data, label = lb))
|
||||
)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 1)
|
||||
expect_equal(xgb.ntree(bst), 20)
|
||||
|
||||
pred <- predict(bst, train$data)
|
||||
pred_err <- sum((pred > 0.5) != lb) / length(lb)
|
||||
expect_lt(abs(attributes(bst)$evaluation_log[1, train_error] - pred_err), 10e-6)
|
||||
# expect_lt(pred_err, 0.03)
|
||||
|
||||
pred <- predict(bst, train$data, ntreelimit = 20)
|
||||
pred <- predict(bst, train$data, iterationrange = c(1, 1))
|
||||
pred_err_20 <- sum((pred > 0.5) != lb) / length(lb)
|
||||
expect_equal(pred_err_20, pred_err)
|
||||
|
||||
pred1 <- predict(bst, train$data, iterationrange = c(1, 2))
|
||||
expect_equal(pred, pred1)
|
||||
})
|
||||
|
||||
test_that("train and predict RF with softprob", {
|
||||
@@ -261,7 +253,6 @@ test_that("train and predict RF with softprob", {
|
||||
watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
|
||||
)
|
||||
expect_equal(xgb.get.num.boosted.rounds(bst), 15)
|
||||
expect_equal(xgb.ntree(bst), 15 * 3 * 4)
|
||||
# predict for all iterations:
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE)
|
||||
expect_equal(dim(pred), c(nrow(iris), 3))
|
||||
@@ -269,7 +260,7 @@ test_that("train and predict RF with softprob", {
|
||||
err <- sum(pred_labels != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6)
|
||||
# predict for 7 iterations and adjust for 4 parallel trees per iteration
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 7 * 4)
|
||||
pred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, iterationrange = c(1, 7))
|
||||
err <- sum((max.col(pred) - 1) != lb) / length(lb)
|
||||
expect_equal(attributes(bst)$evaluation_log[7, train_merror], err, tolerance = 5e-6)
|
||||
})
|
||||
|
||||
@@ -211,12 +211,11 @@ test_that("early stopping xgb.train works", {
|
||||
, "Stopping. Best iteration")
|
||||
expect_false(is.null(xgb.attr(bst, "best_iteration")))
|
||||
expect_lt(xgb.attr(bst, "best_iteration"), 19)
|
||||
expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
|
||||
|
||||
pred <- predict(bst, dtest)
|
||||
expect_equal(length(pred), 1611)
|
||||
err_pred <- err(ltest, pred)
|
||||
err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_error]
|
||||
err_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration") + 1, test_error]
|
||||
expect_equal(err_log, err_pred, tolerance = 5e-6)
|
||||
|
||||
set.seed(11)
|
||||
@@ -231,8 +230,7 @@ test_that("early stopping xgb.train works", {
|
||||
loaded <- xgb.load(fname)
|
||||
|
||||
expect_false(is.null(xgb.attr(loaded, "best_iteration")))
|
||||
expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
|
||||
expect_equal(xgb.attr(loaded, "best_ntreelimit"), xgb.attr(bst, "best_ntreelimit"))
|
||||
expect_equal(xgb.attr(loaded, "best_iteration"), xgb.attr(bst, "best_iteration"))
|
||||
})
|
||||
|
||||
test_that("early stopping using a specific metric works", {
|
||||
@@ -245,12 +243,11 @@ test_that("early stopping using a specific metric works", {
|
||||
, "Stopping. Best iteration")
|
||||
expect_false(is.null(xgb.attr(bst, "best_iteration")))
|
||||
expect_lt(xgb.attr(bst, "best_iteration"), 19)
|
||||
expect_equal(xgb.attr(bst, "best_iteration"), xgb.attr(bst, "best_ntreelimit"))
|
||||
|
||||
pred <- predict(bst, dtest, ntreelimit = xgb.attr(bst, "best_ntreelimit"))
|
||||
pred <- predict(bst, dtest, iterationrange = c(1, xgb.attr(bst, "best_iteration") + 1))
|
||||
expect_equal(length(pred), 1611)
|
||||
logloss_pred <- sum(-ltest * log(pred) - (1 - ltest) * log(1 - pred)) / length(ltest)
|
||||
logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration"), test_logloss]
|
||||
logloss_log <- attributes(bst)$evaluation_log[xgb.attr(bst, "best_iteration") + 1, test_logloss]
|
||||
expect_equal(logloss_log, logloss_pred, tolerance = 1e-5)
|
||||
})
|
||||
|
||||
@@ -286,7 +283,6 @@ test_that("early stopping xgb.cv works", {
|
||||
, "Stopping. Best iteration")
|
||||
expect_false(is.null(cv$best_iteration))
|
||||
expect_lt(cv$best_iteration, 19)
|
||||
expect_equal(cv$best_iteration, cv$best_ntreelimit)
|
||||
# the best error is min error:
|
||||
expect_true(cv$evaluation_log[, test_error_mean[cv$best_iteration] == min(test_error_mean)])
|
||||
})
|
||||
@@ -354,3 +350,44 @@ test_that("prediction in xgb.cv for softprob works", {
|
||||
expect_equal(dim(cv$pred), c(nrow(iris), 3))
|
||||
expect_lt(diff(range(rowSums(cv$pred))), 1e-6)
|
||||
})
|
||||
|
||||
test_that("prediction in xgb.cv works for multi-quantile", {
|
||||
data(mtcars)
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
dm <- xgb.DMatrix(x, label = y, nthread = 1)
|
||||
cv <- xgb.cv(
|
||||
data = dm,
|
||||
params = list(
|
||||
objective = "reg:quantileerror",
|
||||
quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9),
|
||||
nthread = 1
|
||||
),
|
||||
nrounds = 5,
|
||||
nfold = 3,
|
||||
prediction = TRUE,
|
||||
verbose = 0
|
||||
)
|
||||
expect_equal(dim(cv$pred), c(nrow(x), 5))
|
||||
})
|
||||
|
||||
test_that("prediction in xgb.cv works for multi-output", {
|
||||
data(mtcars)
|
||||
y <- mtcars$mpg
|
||||
x <- as.matrix(mtcars[, -1])
|
||||
dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1)
|
||||
cv <- xgb.cv(
|
||||
data = dm,
|
||||
params = list(
|
||||
tree_method = "hist",
|
||||
multi_strategy = "multi_output_tree",
|
||||
objective = "reg:squarederror",
|
||||
nthread = n_threads
|
||||
),
|
||||
nrounds = 5,
|
||||
nfold = 3,
|
||||
prediction = TRUE,
|
||||
verbose = 0
|
||||
)
|
||||
expect_equal(dim(cv$pred), c(nrow(x), 2))
|
||||
})
|
||||
|
||||
@@ -72,10 +72,10 @@ test_that("gblinear early stopping works", {
|
||||
booster <- xgb.train(
|
||||
param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
|
||||
)
|
||||
expect_equal(xgb.attr(booster, "best_iteration"), 5)
|
||||
expect_equal(xgb.attr(booster, "best_iteration"), 4)
|
||||
predt_es <- predict(booster, dtrain)
|
||||
|
||||
n <- xgb.attr(booster, "best_iteration") + es_round
|
||||
n <- xgb.attr(booster, "best_iteration") + es_round + 1
|
||||
booster <- xgb.train(
|
||||
param, dtrain, n, list(eval = dtest, train = dtrain), early_stopping_rounds = es_round
|
||||
)
|
||||
|
||||
@@ -44,7 +44,7 @@ test_that('Test ranking with weighted data', {
|
||||
expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
|
||||
expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
|
||||
for (i in 1:10) {
|
||||
pred <- predict(bst, newdata = dtrain, ntreelimit = i)
|
||||
pred <- predict(bst, newdata = dtrain, iterationrange = c(1, i))
|
||||
# is_sorted[i]: is i-th group correctly sorted by the ranking predictor?
|
||||
is_sorted <- lapply(seq(1, 20, by = 5),
|
||||
function(k) {
|
||||
|
||||
Reference in New Issue
Block a user