diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R index f71aaa71f..2fddfa403 100644 --- a/R-package/R/xgboost.R +++ b/R-package/R/xgboost.R @@ -5,8 +5,8 @@ #' @export xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL, params = list(), nrounds, - verbose = 1, print_every_n = 1L, - early_stopping_rounds = NULL, maximize = NULL, + verbose = 1, print_every_n = 1L, + early_stopping_rounds = NULL, maximize = NULL, save_period = NULL, save_name = "xgboost.model", xgb_model = NULL, callbacks = list(), ...) { @@ -18,16 +18,16 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL, early_stopping_rounds = early_stopping_rounds, maximize = maximize, save_period = save_period, save_name = save_name, xgb_model = xgb_model, callbacks = callbacks, ...) - return(bst) + return (bst) } #' Training part from Mushroom Data Set -#' +#' #' This data set is originally from the Mushroom data set, #' UCI Machine Learning Repository. -#' +#' #' This data set includes the following fields: -#' +#' #' \itemize{ #' \item \code{label} the label for each record #' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns. @@ -35,16 +35,16 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL, #' #' @references #' https://archive.ics.uci.edu/ml/datasets/Mushroom -#' -#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository -#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, +#' +#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository +#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, #' School of Information and Computer Science. -#' +#' #' @docType data #' @keywords datasets #' @name agaricus.train #' @usage data(agaricus.train) -#' @format A list containing a label vector, and a dgCMatrix object with 6513 +#' @format A list containing a label vector, and a dgCMatrix object with 6513 #' rows and 127 variables NULL @@ -52,9 +52,9 @@ NULL #' #' This data set is originally from the Mushroom data set, #' UCI Machine Learning Repository. -#' +#' #' This data set includes the following fields: -#' +#' #' \itemize{ #' \item \code{label} the label for each record #' \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns. @@ -62,16 +62,16 @@ NULL #' #' @references #' https://archive.ics.uci.edu/ml/datasets/Mushroom -#' -#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository -#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, +#' +#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository +#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, #' School of Information and Computer Science. -#' +#' #' @docType data #' @keywords datasets #' @name agaricus.test #' @usage data(agaricus.test) -#' @format A list containing a label vector, and a dgCMatrix object with 1611 +#' @format A list containing a label vector, and a dgCMatrix object with 1611 #' rows and 126 variables NULL @@ -107,7 +107,7 @@ NULL #' @importFrom graphics par #' @importFrom graphics title #' @importFrom grDevices rgb -#' +#' #' @import methods #' @useDynLib xgboost, .registration = TRUE NULL diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index c929ba204..c9083177d 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -313,7 +313,7 @@ SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, R_ExternalPtrAddr(dmat), asInteger(option_mask), asInteger(ntree_limit), - 0, + asInteger(training), &olen, &res)); ret = PROTECT(allocVector(REALSXP, olen)); for (size_t i = 0; i < olen; ++i) { diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 36c148a99..97b90f7a1 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -27,7 +27,7 @@ test_that("train and predict binary classification", { pred <- predict(bst, test$data) expect_length(pred, 1611) - + pred1 <- predict(bst, train$data, ntreelimit = 1) expect_length(pred1, 6513) err_pred1 <- sum((pred1 > 0.5) != train$label)/length(train$label) @@ -35,6 +35,54 @@ test_that("train and predict binary classification", { expect_lt(abs(err_pred1 - err_log), 10e-6) }) +test_that("dart prediction works", { + nrounds = 32 + set.seed(1994) + + d <- cbind( + x1 = rnorm(100), + x2 = rnorm(100), + x3 = rnorm(100)) + y <- d[,"x1"] + d[,"x2"]^2 + + ifelse(d[,"x3"] > .5, d[,"x3"]^2, 2^d[,"x3"]) + + rnorm(100) + + set.seed(1994) + booster_by_xgboost <- xgboost(data = d, label = y, max_depth = 2, booster = "dart", + rate_drop = 0.5, one_drop = TRUE, + eta = 1, nthread = 2, nrounds = nrounds, objective = "reg:squarederror") + pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, ntreelimit = 0) + pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, ntreelimit = nrounds) + expect_true(all(matrix(pred_by_xgboost_0, byrow=TRUE) == matrix(pred_by_xgboost_1, byrow=TRUE))) + + pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE) + expect_false(all(matrix(pred_by_xgboost_0, byrow=TRUE) == matrix(pred_by_xgboost_2, byrow=TRUE))) + + set.seed(1994) + dtrain <- xgb.DMatrix(data=d, info = list(label=y)) + booster_by_train <- xgb.train( params = list( + booster = "dart", + max_depth = 2, + eta = 1, + rate_drop = 0.5, + one_drop = TRUE, + nthread = 1, + tree_method= "exact", + verbosity = 3, + objective = "reg:squarederror" + ), + data = dtrain, + nrounds = nrounds + ) + pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, ntreelimit = 0) + pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, ntreelimit = nrounds) + pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE) + + expect_true(all(matrix(pred_by_train_0, byrow=TRUE) == matrix(pred_by_xgboost_0, byrow=TRUE))) + expect_true(all(matrix(pred_by_train_1, byrow=TRUE) == matrix(pred_by_xgboost_1, byrow=TRUE))) + expect_true(all(matrix(pred_by_train_2, byrow=TRUE) == matrix(pred_by_xgboost_2, byrow=TRUE))) +}) + test_that("train and predict softprob", { lb <- as.numeric(iris$Species) - 1 set.seed(11) @@ -74,7 +122,7 @@ test_that("train and predict softmax", { expect_false(is.null(bst$evaluation_log)) expect_lt(bst$evaluation_log[, min(train_merror)], 0.025) expect_equal(bst$niter * 3, xgb.ntree(bst)) - + pred <- predict(bst, as.matrix(iris[, -5])) expect_length(pred, nrow(iris)) err <- sum(pred != lb)/length(lb) @@ -90,12 +138,12 @@ test_that("train and predict RF", { num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1) expect_equal(bst$niter, 1) expect_equal(xgb.ntree(bst), 20) - + pred <- predict(bst, train$data) pred_err <- sum((pred > 0.5) != lb)/length(lb) expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6) #expect_lt(pred_err, 0.03) - + pred <- predict(bst, train$data, ntreelimit = 20) pred_err_20 <- sum((pred > 0.5) != lb)/length(lb) expect_equal(pred_err_20, pred_err) @@ -211,7 +259,7 @@ test_that("train and predict with non-strict classes", { bst <- xgboost(data = train_dense, label = train$label, max_depth = 2, eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0) pr0 <- predict(bst, train_dense) - + # dense matrix-like input of non-matrix class class(train_dense) <- 'shmatrix' expect_true(is.matrix(train_dense)) @@ -221,7 +269,7 @@ test_that("train and predict with non-strict classes", { , regexp = NA) expect_error(pr <- predict(bst, train_dense), regexp = NA) expect_equal(pr0, pr) - + # dense matrix-like input of non-matrix class with some inheritance class(train_dense) <- c('pphmatrix','shmatrix') expect_true(is.matrix(train_dense)) @@ -231,7 +279,7 @@ test_that("train and predict with non-strict classes", { , regexp = NA) expect_error(pr <- predict(bst, train_dense), regexp = NA) expect_equal(pr0, pr) - + # when someone inhertis from xgb.Booster, it should still be possible to use it as xgb.Booster class(bst) <- c('super.Booster', 'xgb.Booster') expect_error(pr <- predict(bst, train_dense), regexp = NA) diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index a71ce4692..09d1e73df 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -157,7 +157,7 @@ test_that("SHAPs sum to predictions, with or without DART", { params = c( list( booster = booster, - objective = "reg:linear", + objective = "reg:squarederror", eval_metric = "rmse"), if (booster == "dart") list(rate_drop = .01, one_drop = T)), diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index 07bf17f8e..abd2b9fa8 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -435,9 +435,9 @@ class Dart : public GBTree { std::fill(out_preds.begin(), out_preds.end(), model_.learner_model_param_->base_score); } - - PredLoopSpecalize(p_fmat, &out_preds, num_group, 0, - ntree_limit, training); + const int nthread = omp_get_max_threads(); + InitThreadTemp(nthread); + PredLoopSpecalize(p_fmat, &out_preds, num_group, 0, ntree_limit); } void PredictInstance(const SparsePage::Inst &inst, @@ -489,11 +489,8 @@ class Dart : public GBTree { std::vector* out_preds, int num_group, unsigned tree_begin, - unsigned tree_end, - bool training) { - const int nthread = omp_get_max_threads(); + unsigned tree_end) { CHECK_EQ(num_group, model_.learner_model_param_->num_output_group); - InitThreadTemp(nthread); std::vector& preds = *out_preds; CHECK_EQ(model_.param.size_leaf_vector, 0) << "size_leaf_vector is enforced to 0 so far";