diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index f71aaa71f..2fddfa403 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -5,8 +5,8 @@
 #' @export
 xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
                     params = list(), nrounds,
-                    verbose = 1, print_every_n = 1L, 
-                    early_stopping_rounds = NULL, maximize = NULL, 
+                    verbose = 1, print_every_n = 1L,
+                    early_stopping_rounds = NULL, maximize = NULL,
                     save_period = NULL, save_name = "xgboost.model",
                     xgb_model = NULL, callbacks = list(), ...) {
 
@@ -18,16 +18,16 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
                    early_stopping_rounds = early_stopping_rounds, maximize = maximize,
                    save_period = save_period, save_name = save_name,
                    xgb_model = xgb_model, callbacks = callbacks, ...)
-  return(bst)
+  return (bst)
 }
 
 #' Training part from Mushroom Data Set
-#' 
+#'
 #' This data set is originally from the Mushroom data set,
 #' UCI Machine Learning Repository.
-#' 
+#'
 #' This data set includes the following fields:
-#' 
+#'
 #' \itemize{
 #'  \item \code{label} the label for each record
 #'  \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
@@ -35,16 +35,16 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
 #'
 #' @references
 #' https://archive.ics.uci.edu/ml/datasets/Mushroom
-#' 
-#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 
-#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, 
+#'
+#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
+#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
 #' School of Information and Computer Science.
-#' 
+#'
 #' @docType data
 #' @keywords datasets
 #' @name agaricus.train
 #' @usage data(agaricus.train)
-#' @format A list containing a label vector, and a dgCMatrix object with 6513 
+#' @format A list containing a label vector, and a dgCMatrix object with 6513
 #' rows and 127 variables
 NULL
 
@@ -52,9 +52,9 @@ NULL
 #'
 #' This data set is originally from the Mushroom data set,
 #' UCI Machine Learning Repository.
-#' 
+#'
 #' This data set includes the following fields:
-#' 
+#'
 #' \itemize{
 #'  \item \code{label} the label for each record
 #'  \item \code{data} a sparse Matrix of \code{dgCMatrix} class, with 126 columns.
@@ -62,16 +62,16 @@ NULL
 #'
 #' @references
 #' https://archive.ics.uci.edu/ml/datasets/Mushroom
-#' 
-#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository 
-#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, 
+#'
+#' Bache, K. & Lichman, M. (2013). UCI Machine Learning Repository
+#' [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
 #' School of Information and Computer Science.
-#' 
+#'
 #' @docType data
 #' @keywords datasets
 #' @name agaricus.test
 #' @usage data(agaricus.test)
-#' @format A list containing a label vector, and a dgCMatrix object with 1611 
+#' @format A list containing a label vector, and a dgCMatrix object with 1611
 #' rows and 126 variables
 NULL
 
@@ -107,7 +107,7 @@ NULL
 #' @importFrom graphics par
 #' @importFrom graphics title
 #' @importFrom grDevices rgb
-#' 
+#'
 #' @import methods
 #' @useDynLib xgboost, .registration = TRUE
 NULL
diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc
index c929ba204..c9083177d 100644
--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@@ -313,7 +313,7 @@ SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask,
                               R_ExternalPtrAddr(dmat),
                               asInteger(option_mask),
                               asInteger(ntree_limit),
-                              0,
+                              asInteger(training),
                               &olen, &res));
   ret = PROTECT(allocVector(REALSXP, olen));
   for (size_t i = 0; i < olen; ++i) {
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 36c148a99..97b90f7a1 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -27,7 +27,7 @@ test_that("train and predict binary classification", {
 
   pred <- predict(bst, test$data)
   expect_length(pred, 1611)
-  
+
   pred1 <- predict(bst, train$data, ntreelimit = 1)
   expect_length(pred1, 6513)
   err_pred1 <- sum((pred1 > 0.5) != train$label)/length(train$label)
@@ -35,6 +35,54 @@ test_that("train and predict binary classification", {
   expect_lt(abs(err_pred1 - err_log), 10e-6)
 })
 
+test_that("dart prediction works", {
+  nrounds = 32
+  set.seed(1994)
+
+  d <- cbind(
+    x1 = rnorm(100),
+    x2 = rnorm(100),
+    x3 = rnorm(100))
+  y <- d[,"x1"] + d[,"x2"]^2 +
+    ifelse(d[,"x3"] > .5, d[,"x3"]^2, 2^d[,"x3"]) +
+    rnorm(100)
+
+  set.seed(1994)
+  booster_by_xgboost <- xgboost(data = d, label = y, max_depth = 2, booster = "dart",
+                                rate_drop = 0.5, one_drop = TRUE,
+                                eta = 1, nthread = 2, nrounds = nrounds, objective = "reg:squarederror")
+  pred_by_xgboost_0 <- predict(booster_by_xgboost, newdata = d, ntreelimit = 0)
+  pred_by_xgboost_1 <- predict(booster_by_xgboost, newdata = d, ntreelimit = nrounds)
+  expect_true(all(matrix(pred_by_xgboost_0, byrow=TRUE) == matrix(pred_by_xgboost_1, byrow=TRUE)))
+
+  pred_by_xgboost_2 <- predict(booster_by_xgboost, newdata = d, training = TRUE)
+  expect_false(all(matrix(pred_by_xgboost_0, byrow=TRUE) == matrix(pred_by_xgboost_2, byrow=TRUE)))
+
+  set.seed(1994)
+  dtrain <- xgb.DMatrix(data=d, info = list(label=y))
+  booster_by_train <- xgb.train( params = list(
+                                   booster = "dart",
+                                   max_depth = 2,
+                                   eta = 1,
+                                   rate_drop = 0.5,
+                                   one_drop = TRUE,
+                                   nthread = 1,
+                                   tree_method= "exact",
+                                   verbosity = 3,
+                                   objective = "reg:squarederror"
+                                 ),
+                                data = dtrain,
+                                nrounds = nrounds
+                                )
+  pred_by_train_0 <- predict(booster_by_train, newdata = dtrain, ntreelimit = 0)
+  pred_by_train_1 <- predict(booster_by_train, newdata = dtrain, ntreelimit = nrounds)
+  pred_by_train_2 <- predict(booster_by_train, newdata = dtrain, training = TRUE)
+
+  expect_true(all(matrix(pred_by_train_0, byrow=TRUE) == matrix(pred_by_xgboost_0, byrow=TRUE)))
+  expect_true(all(matrix(pred_by_train_1, byrow=TRUE) == matrix(pred_by_xgboost_1, byrow=TRUE)))
+  expect_true(all(matrix(pred_by_train_2, byrow=TRUE) == matrix(pred_by_xgboost_2, byrow=TRUE)))
+})
+
 test_that("train and predict softprob", {
   lb <- as.numeric(iris$Species) - 1
   set.seed(11)
@@ -74,7 +122,7 @@ test_that("train and predict softmax", {
   expect_false(is.null(bst$evaluation_log))
   expect_lt(bst$evaluation_log[, min(train_merror)], 0.025)
   expect_equal(bst$niter * 3, xgb.ntree(bst))
-  
+
   pred <- predict(bst, as.matrix(iris[, -5]))
   expect_length(pred, nrow(iris))
   err <- sum(pred != lb)/length(lb)
@@ -90,12 +138,12 @@ test_that("train and predict RF", {
                  num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1)
   expect_equal(bst$niter, 1)
   expect_equal(xgb.ntree(bst), 20)
-  
+
   pred <- predict(bst, train$data)
   pred_err <- sum((pred > 0.5) != lb)/length(lb)
   expect_lt(abs(bst$evaluation_log[1, train_error] - pred_err), 10e-6)
   #expect_lt(pred_err, 0.03)
-  
+
   pred <- predict(bst, train$data, ntreelimit = 20)
   pred_err_20 <- sum((pred > 0.5) != lb)/length(lb)
   expect_equal(pred_err_20, pred_err)
@@ -211,7 +259,7 @@ test_that("train and predict with non-strict classes", {
   bst <- xgboost(data = train_dense, label = train$label, max_depth = 2,
                  eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic", verbose = 0)
   pr0 <- predict(bst, train_dense)
-  
+
   # dense matrix-like input of non-matrix class
   class(train_dense) <- 'shmatrix'
   expect_true(is.matrix(train_dense))
@@ -221,7 +269,7 @@ test_that("train and predict with non-strict classes", {
     , regexp = NA)
   expect_error(pr <- predict(bst, train_dense), regexp = NA)
   expect_equal(pr0, pr)
-  
+
   # dense matrix-like input of non-matrix class with some inheritance
   class(train_dense) <- c('pphmatrix','shmatrix')
   expect_true(is.matrix(train_dense))
@@ -231,7 +279,7 @@ test_that("train and predict with non-strict classes", {
     , regexp = NA)
   expect_error(pr <- predict(bst, train_dense), regexp = NA)
   expect_equal(pr0, pr)
-  
+
   # when someone inhertis from xgb.Booster, it should still be possible to use it as xgb.Booster
   class(bst) <- c('super.Booster', 'xgb.Booster')
   expect_error(pr <- predict(bst, train_dense), regexp = NA)
diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R
index a71ce4692..09d1e73df 100644
--- a/R-package/tests/testthat/test_helpers.R
+++ b/R-package/tests/testthat/test_helpers.R
@@ -157,7 +157,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
       params = c(
         list(
           booster = booster,
-          objective = "reg:linear",
+          objective = "reg:squarederror",
           eval_metric = "rmse"),
         if (booster == "dart")
           list(rate_drop = .01, one_drop = T)),
diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc
index 07bf17f8e..abd2b9fa8 100644
--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -435,9 +435,9 @@ class Dart : public GBTree {
       std::fill(out_preds.begin(), out_preds.end(),
                 model_.learner_model_param_->base_score);
     }
-
-    PredLoopSpecalize(p_fmat, &out_preds, num_group, 0,
-                      ntree_limit, training);
+    const int nthread = omp_get_max_threads();
+    InitThreadTemp(nthread);
+    PredLoopSpecalize(p_fmat, &out_preds, num_group, 0, ntree_limit);
   }
 
   void PredictInstance(const SparsePage::Inst &inst,
@@ -489,11 +489,8 @@ class Dart : public GBTree {
       std::vector<bst_float>* out_preds,
       int num_group,
       unsigned tree_begin,
-      unsigned tree_end,
-      bool training) {
-    const int nthread = omp_get_max_threads();
+      unsigned tree_end) {
     CHECK_EQ(num_group, model_.learner_model_param_->num_output_group);
-    InitThreadTemp(nthread);
     std::vector<bst_float>& preds = *out_preds;
     CHECK_EQ(model_.param.size_leaf_vector, 0)
         << "size_leaf_vector is enforced to 0 so far";