diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 34d47103f..d1c186975 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -8,29 +8,152 @@ train <- agaricus.train test <- agaricus.test set.seed(1994) -test_that("train and predict", { - bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 1, nthread = 2, nround = 2, objective = "binary:logistic") +test_that("train and predict binary classification", { + nround = 2 + expect_output( + bst <- xgboost(data = train$data, label = train$label, max.depth = 2, + eta = 1, nthread = 2, nround = nround, objective = "binary:logistic") + , "train-error") + expect_equal(class(bst), "xgb.Booster") + + expect_true(!is.null(bst$evaluation_log)) + expect_equal(nrow(bst$evaluation_log), nround) + expect_lt(bst$evaluation_log[, min(train_error)], 0.03) + expect_equal(bst$nboost, bst$ntree) + pred <- predict(bst, test$data) expect_equal(length(pred), 1611) }) -test_that("early stopping", { - res <- xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5, - eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic", - early.stop.round = 3, maximize = FALSE) - expect_true(nrow(res) < 20) - bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic", - early.stop.round = 3, maximize = FALSE) +test_that("train and predict softprob", { + expect_output( + bst <- xgboost(data = as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, + max.depth = 3, eta = 0.5, nthread = 2, nround = 5, + objective = "multi:softprob", num_class=3) + , "train-merror") + expect_true(!is.null(bst$evaluation_log)) + expect_lt(bst$evaluation_log[, min(train_merror)], 0.025) + expect_equal(bst$nboost * 3, bst$ntree) + + pred <- predict(bst, as.matrix(iris[, -5])) + expect_equal(length(pred), nrow(iris) * 3) +}) + +test_that("train and predict softmax", { + expect_output( + bst <- xgboost(data = as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, + max.depth = 3, eta = 0.15, nthread = 2, nround = 25, + objective = "multi:softmax", num_class=3) + , "train-merror") + expect_true(!is.null(bst$evaluation_log)) + expect_lt(bst$evaluation_log[, min(train_merror)], 0.025) + expect_equal(bst$nboost * 3, bst$ntree) + + pred <- predict(bst, as.matrix(iris[, -5])) + expect_equal(length(pred), nrow(iris)) +}) + +test_that("early stopping xgb.train works", { + expect_output( + bst <- xgboost(data = train$data, label = train$label, max.depth = 2, + eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic", + early.stop.round = 3, maximize = FALSE) + , "Stopping. Best iteration") + expect_true(!is.null(bst$best_iteration)) + expect_lt(bst$best_iteration, 19) + expect_equal(bst$nboost, bst$ntree) + expect_equal(bst$best_iteration, bst$best_ntreelimit) + pred <- predict(bst, test$data) expect_equal(length(pred), 1611) }) -test_that("save_period", { - bst <- xgboost(data = train$data, label = train$label, max.depth = 2, - eta = 0.3, nthread = 2, nround = 20, objective = "binary:logistic", - save_period = 10, save_name = "xgb.model") - pred <- predict(bst, test$data) - expect_equal(length(pred), 1611) +test_that("training continuation works", { + dtrain <- xgb.DMatrix(train$data, label = train$label) + watchlist = list(train=dtrain) + param <- list(objective = "binary:logistic", max.depth = 2, eta = 1, nthread = 2) + + # for the reference, use 4 iterations at once: + set.seed(11) + bst <- xgb.train(param, dtrain, nrounds = 4, watchlist) + # first two iterations: + set.seed(11) + bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist) + # continue for two more: + bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = bst1) + expect_equal(bst$raw, bst2$raw) + expect_true(!is.null(bst2$evaluation_log)) + expect_equal(dim(bst2$evaluation_log), c(4, 2)) + expect_equal(bst2$evaluation_log, bst$evaluation_log) + # test continuing from raw model data + bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = bst1$raw) + expect_equal(bst$raw, bst2$raw) + expect_equal(dim(bst2$evaluation_log), c(2, 2)) + # test continuing from a model in file + xgb.save(bst1, "xgboost.model") + bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, xgb_model = "xgboost.model") + expect_equal(bst$raw, bst2$raw) + expect_equal(dim(bst2$evaluation_log), c(2, 2)) +}) + + +test_that("xgb.cv works", { + cv <- xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5, + eta = 1., nthread = 2, nround = 2, objective = "binary:logistic", + verbose=TRUE) + expect_is(cv, 'xgb.cv.synchronous') + expect_true(!is.null(cv$evaluation_log)) + expect_lt(cv$evaluation_log[, min(test_error_mean)], 0.03) + expect_lt(cv$evaluation_log[, min(test_error_std)], 0.004) + expect_equal(cv$nboost, cv$ntree) + expect_true(!is.null(cv$folds) && is.list(cv$folds)) + expect_length(cv$folds, 5) + expect_true(!is.null(cv$params) && is.list(cv$params)) + expect_true(!is.null(cv$callbacks)) + expect_true(!is.null(cv$call)) +}) + +test_that("early stopping xgb.cv works", { + expect_output( + cv <- xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5, + eta = 0.5, nthread = 2, nround = 20, objective = "binary:logistic", + early.stop.round = 3, maximize = FALSE, verbose=TRUE) + , "Stopping. Best iteration") + expect_true(!is.null(cv$best_iteration)) + expect_lt(cv$best_iteration, 19) + expect_equal(cv$nboost, cv$ntree) + expect_equal(cv$best_iteration, cv$best_ntreelimit) +}) + +test_that("prediction in xgb.cv works", { + nround = 2 + cv <- xgb.cv(data = train$data, label = train$label, max.depth = 2, nfold = 5, + eta = 0.5, nthread = 2, nround = nround, objective = "binary:logistic", + verbose=TRUE, prediction = T) + expect_true(!is.null(cv$evaluation_log)) + expect_true(!is.null(cv$pred)) + expect_length(cv$pred, nrow(train$data)) + err_pred <- sum((cv$pred > 0.5) != train$label)/length(train$label) + err_log <- cv$evaluation_log[nround, test_error_mean] + expect_lt(abs(err_pred - err_log), 10e-6) +}) + +test_that("prediction in early-stopping xgb.cv works", { + set.seed(123) + # add some label noise + lb <- train$label + lb[sample(length(train$label), 2000)] <- 0 + expect_output( + cv <- xgb.cv(data = train$data, label = lb, max.depth = 3, nfold = 5, + eta = 1., nthread = 2, nround = 20, objective = "binary:logistic", + early.stop.round = 3, maximize = FALSE, verbose=TRUE, predict=TRUE) + , "Stopping. Best iteration") + expect_true(!is.null(cv$best_iteration)) + expect_lt(cv$best_iteration, 19) + expect_true(!is.null(cv$evaluation_log)) + expect_true(!is.null(cv$pred)) + expect_length(cv$pred, nrow(train$data)) + err_pred <- sum((cv$pred > 0.5) != lb)/length(lb) + err_log <- cv$evaluation_log[cv$best_iteration, test_error_mean] + expect_lt(abs(err_pred - err_log), 10e-6) }) diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R new file mode 100644 index 000000000..bf9239a85 --- /dev/null +++ b/R-package/tests/testthat/test_callbacks.R @@ -0,0 +1,174 @@ +# More specific testing of callbacks + +require(xgboost) +require(data.table) + +context("callbacks") + +data(agaricus.train, package='xgboost') +train <- agaricus.train + +dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) +watchlist = list(train=dtrain) +param <- list(objective = "binary:logistic", max.depth = 2, nthread = 2) + + +test_that("cb.print_evaluation works as expected", { + + bst_evaluation <- c('train-auc'=0.9, 'test-auc'=0.8) + bst_evaluation_err <- NULL + begin_iteration <- 1 + end_iteration <- 7 + + f0 <- cb.print_evaluation(period=0) + f1 <- cb.print_evaluation(period=1) + f5 <- cb.print_evaluation(period=5) + + expect_true(!is.null(attr(f1, 'call'))) + expect_equal(attr(f1, 'name'), 'cb.print_evaluation') + + iteration <- 1 + expect_silent(f0()) + expect_output(f1(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000") + expect_output(f5(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000") + expect_null(f1()) + + iteration <- 2 + expect_output(f1(), "\\[2\\]\ttrain-auc:0.900000\ttest-auc:0.800000") + expect_silent(f5()) + + iteration <- 7 + expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000") + expect_output(f5(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000") + + bst_evaluation_err <- c('train-auc'=0.1, 'test-auc'=0.2) + expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\\+0.100000\ttest-auc:0.800000\\+0.200000") +}) + +test_that("cb.log_evaluation works as expected", { + + bst_evaluation <- c('train-auc'=0.9, 'test-auc'=0.8) + bst_evaluation_err <- NULL + + evaluation_log <- list() + f <- cb.log_evaluation() + + expect_true(!is.null(attr(f, 'call'))) + expect_equal(attr(f, 'name'), 'cb.log_evaluation') + + iteration <- 1 + expect_silent(f()) + expect_equal(evaluation_log, + list(c(iter=1, bst_evaluation))) + iteration <- 2 + expect_silent(f()) + expect_equal(evaluation_log, + list(c(iter=1, bst_evaluation), c(iter=2, bst_evaluation))) + expect_silent(f(finalize = TRUE)) + expect_equal(evaluation_log, + data.table(iter=1:2, train_auc=c(0.9,0.9), test_auc=c(0.8,0.8))) + + bst_evaluation_err <- c('train-auc'=0.1, 'test-auc'=0.2) + evaluation_log <- list() + f <- cb.log_evaluation() + + iteration <- 1 + expect_silent(f()) + expect_equal(evaluation_log, + list(c(iter=1, c(bst_evaluation, bst_evaluation_err)))) + iteration <- 2 + expect_silent(f()) + expect_equal(evaluation_log, + list(c(iter=1, c(bst_evaluation, bst_evaluation_err)), + c(iter=2, c(bst_evaluation, bst_evaluation_err)))) + expect_silent(f(finalize = TRUE)) + expect_equal(evaluation_log, + data.table(iter=1:2, + train_auc_mean=c(0.9,0.9), train_auc_std=c(0.1,0.1), + test_auc_mean=c(0.8,0.8), test_auc_std=c(0.2,0.2))) +}) + +test_that("cb.reset_parameters works as expected", { + + # fixed eta + set.seed(111) + bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9) + expect_true(!is.null(bst0$evaluation_log)) + expect_true(!is.null(bst0$evaluation_log$train_error)) + + # same eta but re-set as a vector parameter in the callback + set.seed(111) + my_par <- list(eta = c(0.9, 0.9)) + bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, + callbacks = list(cb.reset_parameters(my_par))) + expect_true(!is.null(bst1$evaluation_log$train_error)) + expect_equal(bst0$evaluation_log$train_error, + bst1$evaluation_log$train_error) + + # same eta but re-set via a function in the callback + set.seed(111) + my_par <- list(eta = function(itr, itr_end) 0.9) + bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, + callbacks = list(cb.reset_parameters(my_par))) + expect_true(!is.null(bst2$evaluation_log$train_error)) + expect_equal(bst0$evaluation_log$train_error, + bst2$evaluation_log$train_error) + + # different eta re-set as a vector parameter in the callback + set.seed(111) + my_par <- list(eta = c(0.6, 0.5)) + bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, + callbacks = list(cb.reset_parameters(my_par))) + expect_true(!is.null(bst3$evaluation_log$train_error)) + expect_true(!all(bst0$evaluation_log$train_error == bst3$evaluation_log$train_error)) + + # resetting multiple parameters at the same time runs with no error + my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), alpha = c(0.01, 0.02)) + expect_error( + bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, + callbacks = list(cb.reset_parameters(my_par))) + , NA) + + # expect no learning with 0 learning rate + my_par <- list(eta = c(0., 0.)) + bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, + callbacks = list(cb.reset_parameters(my_par))) + expect_true(!is.null(bstX$evaluation_log$train_error)) + er <- unique(bstX$evaluation_log$train_error) + expect_length(er, 1) + expect_gt(er, 0.4) +}) + +# Note: early stopping is tested in test_basic + +test_that("cb.save_model works as expected", { + files <- c('xgboost_01.model', 'xgboost_02.model', 'xgboost.model') + for (f in files) if (file.exists(f)) file.remove(f) + + bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, + save_period = 1, save_name = "xgboost_%02d.model") + expect_true(file.exists('xgboost_01.model')) + expect_true(file.exists('xgboost_02.model')) + b1 <- xgb.load('xgboost_01.model') + expect_length(grep('^booster', xgb.dump(b1)), 1) + b2 <- xgb.load('xgboost_02.model') + expect_equal(bst$raw, b2$raw) + + # save_period = 0 saves the last iteration's model + bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, save_period = 0) + expect_true(file.exists('xgboost.model')) + b2 <- xgb.load('xgboost.model') + expect_equal(bst$raw, b2$raw) + + for (f in files) if (file.exists(f)) file.remove(f) +}) + +test_that("can store evaluation_log without printing", { + expect_silent( + bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, + verbose = 0, callbacks = list(cb.log_evaluation())) + ) + expect_true(!is.null(bst$evaluation_log)) + expect_true(!is.null(bst$evaluation_log$train_error)) + expect_lt(bst$evaluation_log[2, train_error], 0.03) +}) diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R index 7407246c6..47dc61e18 100644 --- a/R-package/tests/testthat/test_custom_objective.R +++ b/R-package/tests/testthat/test_custom_objective.R @@ -2,35 +2,50 @@ context('Test models with custom objective') require(xgboost) +set.seed(1994) + data(agaricus.train, package='xgboost') data(agaricus.test, package='xgboost') dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) +watchlist <- list(eval = dtest, train = dtrain) + +logregobj <- function(preds, dtrain) { + labels <- getinfo(dtrain, "label") + preds <- 1 / (1 + exp(-preds)) + grad <- preds - labels + hess <- preds * (1 - preds) + return(list(grad = grad, hess = hess)) +} + +evalerror <- function(preds, dtrain) { + labels <- getinfo(dtrain, "label") + err <- as.numeric(sum(labels != (preds > 0))) / length(labels) + return(list(metric = "error", value = err)) +} + +param <- list(max.depth=2, eta=1, nthread = 2, + objective=logregobj, eval_metric=evalerror) +num_round <- 2 test_that("custom objective works", { - - watchlist <- list(eval = dtest, train = dtrain) - num_round <- 2 - - logregobj <- function(preds, dtrain) { - labels <- getinfo(dtrain, "label") - preds <- 1 / (1 + exp(-preds)) - grad <- preds - labels - hess <- preds * (1 - preds) - return(list(grad = grad, hess = hess)) - } - evalerror <- function(preds, dtrain) { - labels <- getinfo(dtrain, "label") - err <- as.numeric(sum(labels != (preds > 0))) / length(labels) - return(list(metric = "error", value = err)) - } - - param <- list(max.depth=2, eta=1, nthread = 2, silent=1, - objective=logregobj, eval_metric=evalerror) - bst <- xgb.train(param, dtrain, num_round, watchlist) expect_equal(class(bst), "xgb.Booster") expect_equal(length(bst$raw), 1064) + expect_true(!is.null(bst$evaluation_log)) + expect_true(!is.null(bst$evaluation_log$eval_error)) + expect_lt(bst$evaluation_log[num_round, eval_error], 0.03) +}) + +test_that("custom objective in CV works", { + cv <- xgb.cv(param, dtrain, num_round, nfold=10, verbose=FALSE) + expect_true(!is.null(cv$evaluation_log)) + expect_equal(dim(cv$evaluation_log), c(2, 5)) + expect_lt(cv$evaluation_log[num_round, test_error_mean], 0.03) +}) + +test_that("custom objective using DMatrix attr works", { + attr(dtrain, 'label') <- getinfo(dtrain, 'label') logregobjattr <- function(preds, dtrain) { @@ -40,8 +55,7 @@ test_that("custom objective works", { hess <- preds * (1 - preds) return(list(grad = grad, hess = hess)) } - param <- list(max.depth=2, eta=1, nthread = 2, silent = 1, - objective = logregobjattr, eval_metric = evalerror) + param$objective = logregobjattr bst <- xgb.train(param, dtrain, num_round, watchlist) expect_equal(class(bst), "xgb.Booster") expect_equal(length(bst$raw), 1064) diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index 10af643b8..30def0c2f 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -7,25 +7,27 @@ require(vcd) set.seed(1982) data(Arthritis) -data(agaricus.train, package='xgboost') df <- data.table(Arthritis, keep.rownames = F) df[,AgeDiscret := as.factor(round(Age / 10,0))] df[,AgeCat := as.factor(ifelse(Age > 30, "Old", "Young"))] df[,ID := NULL] sparse_matrix <- sparse.model.matrix(Improved~.-1, data = df) -output_vector <- df[,Y := 0][Improved == "Marked",Y := 1][,Y] -bst.Tree <- xgboost(data = sparse_matrix, label = output_vector, max.depth = 9, +label <- df[, ifelse(Improved == "Marked", 1, 0)] + +bst.Tree <- xgboost(data = sparse_matrix, label = label, max.depth = 9, eta = 1, nthread = 2, nround = 10, objective = "binary:logistic", booster = "gbtree") -bst.GLM <- xgboost(data = sparse_matrix, label = output_vector, +bst.GLM <- xgboost(data = sparse_matrix, label = label, eta = 1, nthread = 2, nround = 10, objective = "binary:logistic", booster = "gblinear") -feature.names <- colnames(agaricus.train$data) +feature.names <- colnames(sparse_matrix) test_that("xgb.dump works", { - capture.output(print(xgb.dump(bst.Tree))) - capture.output(print(xgb.dump(bst.GLM))) + expect_length(xgb.dump(bst.Tree), 172) + expect_length(xgb.dump(bst.GLM), 14) expect_true(xgb.dump(bst.Tree, 'xgb.model.dump', with.stats = T)) + expect_true(file.exists('xgb.model.dump')) + expect_gt(file.size('xgb.model.dump'), 8000) }) test_that("xgb-attribute functionality", { @@ -61,19 +63,19 @@ test_that("xgb.model.dt.tree works with and without feature names", { dt.tree <- xgb.model.dt.tree(feature_names = feature.names, model = bst.Tree) expect_equal(names.dt.trees, names(dt.tree)) expect_equal(dim(dt.tree), c(162, 10)) - xgb.model.dt.tree(model = bst.Tree) + expect_output(str(xgb.model.dt.tree(model = bst.Tree)), 'Feature.*\\"3\\"') }) test_that("xgb.importance works with and without feature names", { - importance.Tree <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst.Tree) + importance.Tree <- xgb.importance(feature_names = feature.names, model = bst.Tree) expect_equal(dim(importance.Tree), c(7, 4)) expect_equal(colnames(importance.Tree), c("Feature", "Gain", "Cover", "Frequency")) - xgb.importance(model = bst.Tree) + expect_output(str(xgb.importance(model = bst.Tree)), 'Feature.*\\"3\\"') xgb.plot.importance(importance_matrix = importance.Tree) }) test_that("xgb.importance works with GLM model", { - importance.GLM <- xgb.importance(feature_names = sparse_matrix@Dimnames[[2]], model = bst.GLM) + importance.GLM <- xgb.importance(feature_names = feature.names, model = bst.GLM) expect_equal(dim(importance.GLM), c(10, 2)) expect_equal(colnames(importance.GLM), c("Feature", "Weight")) xgb.importance(model = bst.GLM) diff --git a/R-package/tests/testthat/test_parameter_exposure.R b/R-package/tests/testthat/test_parameter_exposure.R index 769059b76..4f3a9549e 100644 --- a/R-package/tests/testthat/test_parameter_exposure.R +++ b/R-package/tests/testthat/test_parameter_exposure.R @@ -9,7 +9,7 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label) dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label) bst <- xgboost(data = dtrain, - max.depth = 2, + max_depth = 2, eta = 1, nround = 10, nthread = 1, @@ -17,16 +17,14 @@ bst <- xgboost(data = dtrain, objective = "binary:logistic") test_that("call is exposed to R", { - model_call <- attr(bst, "call") - expect_is(model_call, "call") + expect_true(!is.null(bst$call)) + expect_is(bst$call, "call") }) test_that("params is exposed to R", { - model_params <- attr(bst, "params") - + model_params <- bst$params expect_is(model_params, "list") - expect_equal(model_params$eta, 1) - expect_equal(model_params$max.depth, 2) + expect_equal(model_params$max_depth, 2) expect_equal(model_params$objective, "binary:logistic") }) diff --git a/R-package/tests/testthat/test_poisson_regression.R b/R-package/tests/testthat/test_poisson_regression.R index 5473d930f..a48f2fc47 100644 --- a/R-package/tests/testthat/test_poisson_regression.R +++ b/R-package/tests/testthat/test_poisson_regression.R @@ -5,10 +5,10 @@ set.seed(1994) test_that("poisson regression works", { data(mtcars) - bst <- xgboost(data = as.matrix(mtcars[,-11]),label = mtcars[,11], - objective = 'count:poisson', nrounds=5) + bst <- xgboost(data = as.matrix(mtcars[,-11]), label = mtcars[,11], + objective = 'count:poisson', nrounds=10, verbose=0) expect_equal(class(bst), "xgb.Booster") - pred <- predict(bst,as.matrix(mtcars[, -11])) + pred <- predict(bst, as.matrix(mtcars[, -11])) expect_equal(length(pred), 32) - expect_less_than(sqrt(mean( (pred - mtcars[,11]) ^ 2)), 2.5) + expect_lt(sqrt(mean( (pred - mtcars[,11])^2 )), 1.2) })