diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index d1c186975..03e77c1df 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -9,69 +9,136 @@ test <- agaricus.test set.seed(1994) test_that("train and predict binary classification", { - nround = 2 + nrounds = 2 expect_output( - bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nthread = 2, nround = nround, objective = "binary:logistic") + bst <- xgboost(data = train$data, label = train$label, max_depth = 2, + eta = 1, nthread = 2, nrounds = nrounds, objective = "binary:logistic") , "train-error") expect_equal(class(bst), "xgb.Booster") - - expect_true(!is.null(bst$evaluation_log)) - expect_equal(nrow(bst$evaluation_log), nround) + expect_equal(bst$niter, nrounds) + expect_false(is.null(bst$evaluation_log)) + expect_equal(nrow(bst$evaluation_log), nrounds) expect_lt(bst$evaluation_log[, min(train_error)], 0.03) - expect_equal(bst$nboost, bst$ntree) - + pred <- predict(bst, test$data) - expect_equal(length(pred), 1611) + expect_length(pred, 1611) + + pred1 <- predict(bst, train$data, ntreelimit = 1) + expect_length(pred1, 6513) + err_pred1 <- sum((pred1 > 0.5) != train$label)/length(train$label) + err_log <- bst$evaluation_log[1, train_error] + expect_lt(abs(err_pred1 - err_log), 10e-6) }) test_that("train and predict softprob", { + lb <- as.numeric(iris$Species) - 1 + set.seed(11) expect_output( - bst <- xgboost(data = as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, - max.depth = 3, eta = 0.5, nthread = 2, nround = 5, + bst <- xgboost(data = as.matrix(iris[, -5]), label = lb, + max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5, objective = "multi:softprob", num_class=3) , "train-merror") - expect_true(!is.null(bst$evaluation_log)) + expect_false(is.null(bst$evaluation_log)) expect_lt(bst$evaluation_log[, min(train_merror)], 0.025) - expect_equal(bst$nboost * 3, bst$ntree) - + expect_equal(bst$niter * 3, xgb.ntree(bst)) pred <- predict(bst, as.matrix(iris[, -5])) - expect_equal(length(pred), nrow(iris) * 3) + expect_length(pred, nrow(iris) * 3) + # row sums add up to total probability of 1: + expect_equal(rowSums(matrix(pred, ncol=3, byrow=TRUE)), rep(1, nrow(iris)), tolerance = 1e-7) + # manually calculate error at the last iteration: + mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE) + expect_equal(as.numeric(t(mpred)), pred) + pred_labels <- max.col(mpred) - 1 + err <- sum(pred_labels != lb)/length(lb) + expect_equal(bst$evaluation_log[5, train_merror], err, tolerance = 5e-6) + # manually calculate error at the 1st iteration: + mpred <- predict(bst, as.matrix(iris[, -5]), reshape = TRUE, ntreelimit = 1) + pred_labels <- max.col(mpred) - 1 + err <- sum(pred_labels != lb)/length(lb) + expect_equal(bst$evaluation_log[1, train_merror], err, tolerance = 5e-6) }) test_that("train and predict softmax", { + lb <- as.numeric(iris$Species) - 1 + set.seed(11) expect_output( - bst <- xgboost(data = as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, - max.depth = 3, eta = 0.15, nthread = 2, nround = 25, + bst <- xgboost(data = as.matrix(iris[, -5]), label = lb, + max_depth = 3, eta = 0.5, nthread = 2, nrounds = 5, objective = "multi:softmax", num_class=3) , "train-merror") - expect_true(!is.null(bst$evaluation_log)) + expect_false(is.null(bst$evaluation_log)) expect_lt(bst$evaluation_log[, min(train_merror)], 0.025) - expect_equal(bst$nboost * 3, bst$ntree) + expect_equal(bst$niter * 3, xgb.ntree(bst)) pred <- predict(bst, as.matrix(iris[, -5])) - expect_equal(length(pred), nrow(iris)) + expect_length(pred, nrow(iris)) + err <- sum(pred != lb)/length(lb) + expect_equal(bst$evaluation_log[5, train_merror], err, tolerance = 5e-6) }) -test_that("early stopping xgb.train works", { +test_that("train and predict RF", { + set.seed(11) + lb <- train$label + # single iteration + bst <- xgboost(data = train$data, label = lb, max_depth = 5, + nthread = 2, nrounds = 1, objective = "binary:logistic", + num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1) + expect_equal(bst$niter, 1) + expect_equal(xgb.ntree(bst), 20) + + pred <- predict(bst, train$data) + pred_err <- sum((pred > 0.5) != lb)/length(lb) + expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6) + expect_lt(pred_err, 0.03) + + pred <- predict(bst, train$data, ntreelimit = 20) + pred_err_20 <- sum((pred > 0.5) != lb)/length(lb) + expect_equal(pred_err_20, pred_err) + + pred <- predict(bst, train$data, ntreelimit = 1) + pred_err_1 <- sum((pred > 0.5) != lb)/length(lb) + expect_lt(pred_err, pred_err_1) + expect_lt(pred_err, 0.08) +}) + +test_that("train and predict RF with softprob", { + lb <- as.numeric(iris$Species) - 1 + nrounds <- 15 + set.seed(11) + bst <- xgboost(data = as.matrix(iris[, -5]), label = lb, + max_depth = 3, eta = 0.9, nthread = 2, nrounds = nrounds, + objective = "multi:softprob", num_class=3, + num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5) + expect_equal(bst$niter, 15) + expect_equal(xgb.ntree(bst), 15*3*4) + # predict for all iterations: + pred <- predict(bst, as.matrix(iris[, -5]), reshape=TRUE) + expect_equal(dim(pred), c(nrow(iris), 3)) + pred_labels <- max.col(pred) - 1 + err <- sum(pred_labels != lb)/length(lb) + expect_equal(bst$evaluation_log[nrounds, train_merror], err, tolerance = 5e-6) + # predict for 7 iterations and adjust for 4 parallel trees per iteration + pred <- predict(bst, as.matrix(iris[, -5]), reshape=TRUE, ntreelimit = 7 * 4) + err <- sum((max.col(pred) - 1) != lb)/length(lb) + expect_equal(bst$evaluation_log[7, train_merror], err, tolerance = 5e-6) +}) + +test_that("use of multiple eval metrics works", { expect_output( - bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic", - early.stop.round = 3, maximize = FALSE) - , "Stopping. Best iteration") - expect_true(!is.null(bst$best_iteration)) - expect_lt(bst$best_iteration, 19) - expect_equal(bst$nboost, bst$ntree) - expect_equal(bst$best_iteration, bst$best_ntreelimit) - - pred <- predict(bst, test$data) - expect_equal(length(pred), 1611) + bst <- xgboost(data = train$data, label = train$label, max_depth = 2, + eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", + eval_metric = 'error', eval_metric = 'auc', eval_metric = "logloss") + , "train-error.*train-auc.*train-logloss") + expect_false(is.null(bst$evaluation_log)) + expect_equal(dim(bst$evaluation_log), c(2, 4)) + expect_equal(colnames(bst$evaluation_log), c("iter", "train_error", "train_auc", "train_logloss")) }) + test_that("training continuation works", { dtrain <- xgb.DMatrix(train$data, label = train$label) watchlist = list(train=dtrain) - param <- list(objective = "binary:logistic", max.depth = 2, eta = 1, nthread = 2) + param <- list(objective = "binary:logistic", max_depth = 2, eta = 1, nthread = 2) # for the reference, use 4 iterations at once: set.seed(11) @@ -82,7 +149,7 @@ test_that("training continuation works", { # continue for two more: bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = bst1) expect_equal(bst$raw, bst2$raw) - expect_true(!is.null(bst2$evaluation_log)) + expect_false(is.null(bst2$evaluation_log)) expect_equal(dim(bst2$evaluation_log), c(4, 2)) expect_equal(bst2$evaluation_log, bst$evaluation_log) # test continuing from raw model data @@ -98,62 +165,18 @@ test_that("training continuation works", { test_that("xgb.cv works", { - cv <- xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5, - eta = 1., nthread = 2, nround = 2, objective = "binary:logistic", + set.seed(11) + cv <- xgb.cv(data = train$data, label = train$label, max_depth = 2, nfold = 5, + eta = 1., nthread = 2, nrounds = 2, objective = "binary:logistic", verbose=TRUE) expect_is(cv, 'xgb.cv.synchronous') - expect_true(!is.null(cv$evaluation_log)) + expect_false(is.null(cv$evaluation_log)) expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03) expect_lt(cv$evaluation_log[, min(test_error_std)], 0.004) - expect_equal(cv$nboost, cv$ntree) - expect_true(!is.null(cv$folds) && is.list(cv$folds)) + expect_equal(cv$niter, 2) + expect_false(is.null(cv$folds) && is.list(cv$folds)) expect_length(cv$folds, 5) - expect_true(!is.null(cv$params) && is.list(cv$params)) - expect_true(!is.null(cv$callbacks)) - expect_true(!is.null(cv$call)) -}) - -test_that("early stopping xgb.cv works", { - expect_output( - cv <- xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5, - eta = 0.5, nthread = 2, nround = 20, objective = "binary:logistic", - early.stop.round = 3, maximize = FALSE, verbose=TRUE) - , "Stopping. Best iteration") - expect_true(!is.null(cv$best_iteration)) - expect_lt(cv$best_iteration, 19) - expect_equal(cv$nboost, cv$ntree) - expect_equal(cv$best_iteration, cv$best_ntreelimit) -}) - -test_that("prediction in xgb.cv works", { - nround = 2 - cv <- xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5, - eta = 0.5, nthread = 2, nround = nround, objective = "binary:logistic", - verbose=TRUE, prediction = T) - expect_true(!is.null(cv$evaluation_log)) - expect_true(!is.null(cv$pred)) - expect_length(cv$pred, nrow(train$data)) - err_pred <- sum((cv$pred > 0.5) != train$label)/length(train$label) - err_log <- cv$evaluation_log[nround, test_error_mean] - expect_lt(abs(err_pred - err_log), 10e-6) -}) - -test_that("prediction in early-stopping xgb.cv works", { - set.seed(123) - # add some label noise - lb <- train$label - lb[sample(length(train$label), 2000)] <- 0 - expect_output( - cv <- xgb.cv(data = train$data, label = lb, max.depth = 3, nfold = 5, - eta = 1., nthread = 2, nround = 20, objective = "binary:logistic", - early.stop.round = 3, maximize = FALSE, verbose=TRUE, predict=TRUE) - , "Stopping. Best iteration") - expect_true(!is.null(cv$best_iteration)) - expect_lt(cv$best_iteration, 19) - expect_true(!is.null(cv$evaluation_log)) - expect_true(!is.null(cv$pred)) - expect_length(cv$pred, nrow(train$data)) - err_pred <- sum((cv$pred > 0.5) != lb)/length(lb) - err_log <- cv$evaluation_log[cv$best_iteration, test_error_mean] - expect_lt(abs(err_pred - err_log), 10e-6) + expect_false(is.null(cv$params) && is.list(cv$params)) + expect_false(is.null(cv$callbacks)) + expect_false(is.null(cv$call)) }) diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R index bf9239a85..003eb1069 100644 --- a/R-package/tests/testthat/test_callbacks.R +++ b/R-package/tests/testthat/test_callbacks.R @@ -6,26 +6,42 @@ require(data.table) context("callbacks") data(agaricus.train, package='xgboost') +data(agaricus.test, package='xgboost') train <- agaricus.train +test <- agaricus.test -dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) -watchlist = list(train=dtrain) -param <- list(objective = "binary:logistic", max.depth = 2, nthread = 2) +# add some label noise for early stopping tests +add.noise <- function(label, frac) { + inoise <- sample(length(label), length(label) * frac) + label[inoise] <- !label[inoise] + label +} +set.seed(11) +ltrain <- add.noise(train$label, 0.2) +ltest <- add.noise(test$label, 0.2) +dtrain <- xgb.DMatrix(train$data, label = ltrain) +dtest <- xgb.DMatrix(test$data, label = ltest) +watchlist = list(train=dtrain, test=dtest) -test_that("cb.print_evaluation works as expected", { +err <- function(label, pr) sum((pr > 0.5) != label)/length(label) + +param <- list(objective = "binary:logistic", max_depth = 2, nthread = 2) + + +test_that("cb.print.evaluation works as expected", { bst_evaluation <- c('train-auc'=0.9, 'test-auc'=0.8) bst_evaluation_err <- NULL begin_iteration <- 1 end_iteration <- 7 - f0 <- cb.print_evaluation(period=0) - f1 <- cb.print_evaluation(period=1) - f5 <- cb.print_evaluation(period=5) + f0 <- cb.print.evaluation(period=0) + f1 <- cb.print.evaluation(period=1) + f5 <- cb.print.evaluation(period=5) - expect_true(!is.null(attr(f1, 'call'))) - expect_equal(attr(f1, 'name'), 'cb.print_evaluation') + expect_false(is.null(attr(f1, 'call'))) + expect_equal(attr(f1, 'name'), 'cb.print.evaluation') iteration <- 1 expect_silent(f0()) @@ -45,16 +61,16 @@ test_that("cb.print_evaluation works as expected", { expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\\+0.100000\ttest-auc:0.800000\\+0.200000") }) -test_that("cb.log_evaluation works as expected", { +test_that("cb.evaluation.log works as expected", { bst_evaluation <- c('train-auc'=0.9, 'test-auc'=0.8) bst_evaluation_err <- NULL evaluation_log <- list() - f <- cb.log_evaluation() + f <- cb.evaluation.log() - expect_true(!is.null(attr(f, 'call'))) - expect_equal(attr(f, 'name'), 'cb.log_evaluation') + expect_false(is.null(attr(f, 'call'))) + expect_equal(attr(f, 'name'), 'cb.evaluation.log') iteration <- 1 expect_silent(f()) @@ -70,7 +86,7 @@ test_that("cb.log_evaluation works as expected", { bst_evaluation_err <- c('train-auc'=0.1, 'test-auc'=0.2) evaluation_log <- list() - f <- cb.log_evaluation() + f <- cb.evaluation.log() iteration <- 1 expect_silent(f()) @@ -88,20 +104,23 @@ test_that("cb.log_evaluation works as expected", { test_auc_mean=c(0.8,0.8), test_auc_std=c(0.2,0.2))) }) -test_that("cb.reset_parameters works as expected", { + +param <- list(objective = "binary:logistic", max_depth = 4, nthread = 2) + +test_that("cb.reset.parameters works as expected", { # fixed eta set.seed(111) bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9) - expect_true(!is.null(bst0$evaluation_log)) - expect_true(!is.null(bst0$evaluation_log$train_error)) + expect_false(is.null(bst0$evaluation_log)) + expect_false(is.null(bst0$evaluation_log$train_error)) # same eta but re-set as a vector parameter in the callback set.seed(111) my_par <- list(eta = c(0.9, 0.9)) bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, - callbacks = list(cb.reset_parameters(my_par))) - expect_true(!is.null(bst1$evaluation_log$train_error)) + callbacks = list(cb.reset.parameters(my_par))) + expect_false(is.null(bst1$evaluation_log$train_error)) expect_equal(bst0$evaluation_log$train_error, bst1$evaluation_log$train_error) @@ -109,8 +128,8 @@ test_that("cb.reset_parameters works as expected", { set.seed(111) my_par <- list(eta = function(itr, itr_end) 0.9) bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, - callbacks = list(cb.reset_parameters(my_par))) - expect_true(!is.null(bst2$evaluation_log$train_error)) + callbacks = list(cb.reset.parameters(my_par))) + expect_false(is.null(bst2$evaluation_log$train_error)) expect_equal(bst0$evaluation_log$train_error, bst2$evaluation_log$train_error) @@ -118,44 +137,43 @@ test_that("cb.reset_parameters works as expected", { set.seed(111) my_par <- list(eta = c(0.6, 0.5)) bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, - callbacks = list(cb.reset_parameters(my_par))) - expect_true(!is.null(bst3$evaluation_log$train_error)) - expect_true(!all(bst0$evaluation_log$train_error == bst3$evaluation_log$train_error)) + callbacks = list(cb.reset.parameters(my_par))) + expect_false(is.null(bst3$evaluation_log$train_error)) + expect_false(all(bst0$evaluation_log$train_error == bst3$evaluation_log$train_error)) # resetting multiple parameters at the same time runs with no error - my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), alpha = c(0.01, 0.02)) + my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8)) expect_error( bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, - callbacks = list(cb.reset_parameters(my_par))) - , NA) + callbacks = list(cb.reset.parameters(my_par))) + , NA) # NA = no error # expect no learning with 0 learning rate my_par <- list(eta = c(0., 0.)) bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, - callbacks = list(cb.reset_parameters(my_par))) - expect_true(!is.null(bstX$evaluation_log$train_error)) + callbacks = list(cb.reset.parameters(my_par))) + expect_false(is.null(bstX$evaluation_log$train_error)) er <- unique(bstX$evaluation_log$train_error) expect_length(er, 1) expect_gt(er, 0.4) }) -# Note: early stopping is tested in test_basic - -test_that("cb.save_model works as expected", { +test_that("cb.save.model works as expected", { files <- c('xgboost_01.model', 'xgboost_02.model', 'xgboost.model') for (f in files) if (file.exists(f)) file.remove(f) - bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, + bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, save_period = 1, save_name = "xgboost_%02d.model") expect_true(file.exists('xgboost_01.model')) expect_true(file.exists('xgboost_02.model')) b1 <- xgb.load('xgboost_01.model') - expect_length(grep('^booster', xgb.dump(b1)), 1) + expect_equal(xgb.ntree(b1), 1) b2 <- xgb.load('xgboost_02.model') + expect_equal(xgb.ntree(b2), 2) expect_equal(bst$raw, b2$raw) # save_period = 0 saves the last iteration's model - bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, save_period = 0) + bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, save_period = 0) expect_true(file.exists('xgboost.model')) b2 <- xgb.load('xgboost.model') expect_equal(bst$raw, b2$raw) @@ -165,10 +183,99 @@ test_that("cb.save_model works as expected", { test_that("can store evaluation_log without printing", { expect_silent( - bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, - verbose = 0, callbacks = list(cb.log_evaluation())) + bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1, + verbose = 0, callbacks = list(cb.evaluation.log())) ) - expect_true(!is.null(bst$evaluation_log)) - expect_true(!is.null(bst$evaluation_log$train_error)) - expect_lt(bst$evaluation_log[2, train_error], 0.03) + expect_false(is.null(bst$evaluation_log)) + expect_false(is.null(bst$evaluation_log$train_error)) + expect_lt(bst$evaluation_log[, min(train_error)], 0.2) +}) + +test_that("early stopping xgb.train works", { + set.seed(11) + expect_output( + bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3, + early_stopping_rounds = 3, maximize = FALSE) + , "Stopping. Best iteration") + expect_false(is.null(bst$best_iteration)) + expect_lt(bst$best_iteration, 19) + expect_equal(bst$best_iteration, bst$best_ntreelimit) + + pred <- predict(bst, dtest) + expect_equal(length(pred), 1611) + err_pred <- err(ltest, pred) + err_log <- bst$evaluation_log[bst$best_iteration, test_error] + expect_equal(err_log, err_pred, tolerance = 5e-6) +}) + +test_that("early stopping using a specific metric works", { + set.seed(11) + expect_output( + bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.6, + eval_metric="logloss", eval_metric="auc", + callbacks = list(cb.early.stop(stopping_rounds = 3, maximize = FALSE, + metric_name = 'test_logloss'))) + , "Stopping. Best iteration") + expect_false(is.null(bst$best_iteration)) + expect_lt(bst$best_iteration, 19) + expect_equal(bst$best_iteration, bst$best_ntreelimit) + + pred <- predict(bst, dtest, ntreelimit = bst$best_ntreelimit) + expect_equal(length(pred), 1611) + logloss_pred <- sum(-ltest * log(pred) - (1 - ltest) * log(1 - pred)) / length(ltest) + logloss_log <- bst$evaluation_log[bst$best_iteration, test_logloss] + expect_equal(logloss_log, logloss_pred, tolerance = 5e-6) +}) + +test_that("early stopping xgb.cv works", { + set.seed(11) + expect_output( + cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.3, nrounds = 20, + early_stopping_rounds = 3, maximize = FALSE) + , "Stopping. Best iteration") + expect_false(is.null(cv$best_iteration)) + expect_lt(cv$best_iteration, 19) + expect_equal(cv$best_iteration, cv$best_ntreelimit) + # the best error is min error: + expect_true(cv$evaluation_log[, test_error_mean[cv$best_iteration] == min(test_error_mean)]) +}) + +test_that("prediction in xgb.cv works", { + set.seed(11) + nrounds = 4 + cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE) + expect_false(is.null(cv$evaluation_log)) + expect_false(is.null(cv$pred)) + expect_length(cv$pred, nrow(train$data)) + err_pred <- mean( sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))) ) + err_log <- cv$evaluation_log[nrounds, test_error_mean] + expect_equal(err_pred, err_log, tolerance = 1e-6) + + # save CV models + set.seed(11) + cvx <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, + callbacks = list(cb.cv.predict(save_models = TRUE))) + expect_equal(cv$evaluation_log, cvx$evaluation_log) + expect_length(cvx$models, 5) + expect_true(all(sapply(cvx$models, class) == 'xgb.Booster')) +}) + +test_that("prediction in early-stopping xgb.cv works", { + set.seed(1) + expect_output( + cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.1, nrounds = 20, + early_stopping_rounds = 5, maximize = FALSE, prediction=TRUE) + , "Stopping. Best iteration") + + expect_false(is.null(cv$best_iteration)) + expect_lt(cv$best_iteration, 19) + expect_false(is.null(cv$evaluation_log)) + expect_false(is.null(cv$pred)) + expect_length(cv$pred, nrow(train$data)) + + err_pred <- mean( sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))) ) + err_log <- cv$evaluation_log[cv$best_iteration, test_error_mean] + expect_equal(err_pred, err_log, tolerance = 1e-6) + err_log_last <- cv$evaluation_log[cv$niter, test_error_mean] + expect_gt(abs(err_pred - err_log_last), 1e-4) }) diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R index 47dc61e18..ba0595c95 100644 --- a/R-package/tests/testthat/test_custom_objective.R +++ b/R-package/tests/testthat/test_custom_objective.R @@ -24,22 +24,22 @@ evalerror <- function(preds, dtrain) { return(list(metric = "error", value = err)) } -param <- list(max.depth=2, eta=1, nthread = 2, +param <- list(max_depth=2, eta=1, nthread = 2, objective=logregobj, eval_metric=evalerror) num_round <- 2 test_that("custom objective works", { bst <- xgb.train(param, dtrain, num_round, watchlist) expect_equal(class(bst), "xgb.Booster") - expect_equal(length(bst$raw), 1064) - expect_true(!is.null(bst$evaluation_log)) - expect_true(!is.null(bst$evaluation_log$eval_error)) + expect_equal(length(bst$raw), 1094) + expect_false(is.null(bst$evaluation_log)) + expect_false(is.null(bst$evaluation_log$eval_error)) expect_lt(bst$evaluation_log[num_round, eval_error], 0.03) }) test_that("custom objective in CV works", { cv <- xgb.cv(param, dtrain, num_round, nfold=10, verbose=FALSE) - expect_true(!is.null(cv$evaluation_log)) + expect_false(is.null(cv$evaluation_log)) expect_equal(dim(cv$evaluation_log), c(2, 5)) expect_lt(cv$evaluation_log[num_round, test_error_mean], 0.03) }) @@ -58,5 +58,5 @@ test_that("custom objective using DMatrix attr works", { param$objective = logregobjattr bst <- xgb.train(param, dtrain, num_round, watchlist) expect_equal(class(bst), "xgb.Booster") - expect_equal(length(bst$raw), 1064) + expect_equal(length(bst$raw), 1094) }) diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 30def0c2f..64ffd7f58 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -14,18 +14,18 @@ df[,ID := NULL] sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df) label <- df[, ifelse(Improved == "Marked", 1, 0)] -bst.Tree <- xgboost(data = sparse_matrix, label = label, max.depth = 9, - eta = 1, nthread = 2, nround = 10, objective = "binary:logistic", booster = "gbtree") +bst.Tree <- xgboost(data = sparse_matrix, label = label, max_depth = 9, + eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic", booster = "gbtree") bst.GLM <- xgboost(data = sparse_matrix, label = label, - eta = 1, nthread = 2, nround = 10, objective = "binary:logistic", booster = "gblinear") + eta = 1, nthread = 2, nrounds = 10, objective = "binary:logistic", booster = "gblinear") feature.names <- colnames(sparse_matrix) test_that("xgb.dump works", { expect_length(xgb.dump(bst.Tree), 172) expect_length(xgb.dump(bst.GLM), 14) - expect_true(xgb.dump(bst.Tree, 'xgb.model.dump', with.stats = T)) + expect_true(xgb.dump(bst.Tree, 'xgb.model.dump', with_stats = T)) expect_true(file.exists('xgb.model.dump')) expect_gt(file.size('xgb.model.dump'), 8000) }) @@ -35,12 +35,15 @@ test_that("xgb-attribute functionality", { list.val <- list(my_attr=val, a=123, b='ok') list.ch <- list.val[order(names(list.val))] list.ch <- lapply(list.ch, as.character) + # note: iter is 0-index in xgb attributes + list.default <- list(niter = "9") + list.ch <- c(list.ch, list.default) # proper input: expect_error(xgb.attr(bst.Tree, NULL)) expect_error(xgb.attr(val, val)) # set & get: expect_null(xgb.attr(bst.Tree, "asdf")) - expect_null(xgb.attributes(bst.Tree)) # initially, expect no attributes + expect_equal(xgb.attributes(bst.Tree), list.default) xgb.attr(bst.Tree, "my_attr") <- val expect_equal(xgb.attr(bst.Tree, "my_attr"), val) xgb.attributes(bst.Tree) <- list.val @@ -53,8 +56,10 @@ test_that("xgb-attribute functionality", { # deletion: xgb.attr(bst, "my_attr") <- NULL expect_null(xgb.attr(bst, "my_attr")) - expect_equal(xgb.attributes(bst), list.ch[c("a", "b")]) + expect_equal(xgb.attributes(bst), list.ch[c("a", "b", "niter")]) xgb.attributes(bst) <- list(a=NULL, b=NULL) + expect_equal(xgb.attributes(bst), list.default) + xgb.attributes(bst) <- list(niter=NULL) expect_null(xgb.attributes(bst)) }) @@ -88,10 +93,27 @@ test_that("xgb.plot.tree works with and without feature names", { }) test_that("xgb.plot.multi.trees works with and without feature names", { - xgb.plot.multi.trees(model = bst.Tree, feature_names = feature.names, features.keep = 3) - xgb.plot.multi.trees(model = bst.Tree, features.keep = 3) + xgb.plot.multi.trees(model = bst.Tree, feature_names = feature.names, features_keep = 3) + xgb.plot.multi.trees(model = bst.Tree, features_keep = 3) }) test_that("xgb.plot.deepness works", { xgb.plot.deepness(model = bst.Tree) }) + +test_that("check.deprecation works", { + ttt <- function(a = NNULL, DUMMY=NULL, ...) { + check.deprecation(...) + as.list((environment())) + } + res <- ttt(a = 1, DUMMY = 2, z = 3) + expect_equal(res, list(a = 1, DUMMY = 2)) + expect_warning( + res <- ttt(a = 1, dummy = 22, z = 3) + , "\'dummy\' is deprecated") + expect_equal(res, list(a = 1, DUMMY = 22)) + expect_warning( + res <- ttt(a = 1, dumm = 22, z = 3) + , "\'dumm\' was partially matched to \'dummy\'") + expect_equal(res, list(a = 1, DUMMY = 22)) +}) diff --git a/R-package/tests/testthat/test_parameter_exposure.R b/R-package/tests/testthat/test_parameter_exposure.R index 4f3a9549e..1a0dcb39f 100644 --- a/R-package/tests/testthat/test_parameter_exposure.R +++ b/R-package/tests/testthat/test_parameter_exposure.R @@ -11,13 +11,13 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) bst <- xgboost(data = dtrain, max_depth = 2, eta = 1, - nround = 10, + nrounds = 10, nthread = 1, verbose = 0, objective = "binary:logistic") test_that("call is exposed to R", { - expect_true(!is.null(bst$call)) + expect_false(is.null(bst$call)) expect_is(bst$call, "call") })