[Breaking] Don't drop trees during DART prediction by default (#5115)

* Simplify DropTrees calling logic

* Add `training` parameter for prediction method.

* [Breaking]: Add `training` to C API.

* Change for R and Python custom objective.

* Correct comment.

Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
Co-authored-by: Jiaming Yuan <jm.yuan@outlook.com>
This commit is contained in:
Kodi Arfer
2020-01-13 08:48:30 -05:00
committed by Jiaming Yuan
parent 7b65698187
commit f100b8d878
23 changed files with 214 additions and 140 deletions

View File

@@ -145,7 +145,7 @@ xgb.iter.update <- function(booster_handle, dtrain, iter, obj = NULL) {
if (is.null(obj)) {
.Call(XGBoosterUpdateOneIter_R, booster_handle, as.integer(iter), dtrain)
} else {
pred <- predict(booster_handle, dtrain)
pred <- predict(booster_handle, dtrain, training = TRUE)
gpair <- obj(pred, dtrain)
.Call(XGBoosterBoostOneIter_R, booster_handle, dtrain, gpair$grad, gpair$hess)
}

View File

@@ -288,7 +288,7 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) {
#' @export
predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FALSE, ntreelimit = NULL,
predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE,
reshape = FALSE, ...) {
reshape = FALSE, training = FALSE, ...) {
object <- xgb.Booster.complete(object, saveraw = FALSE)
if (!inherits(newdata, "xgb.DMatrix"))
@@ -307,7 +307,8 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA
option <- 0L + 1L * as.logical(outputmargin) + 2L * as.logical(predleaf) + 4L * as.logical(predcontrib) +
8L * as.logical(approxcontrib) + 16L * as.logical(predinteraction)
ret <- .Call(XGBoosterPredict_R, object$handle, newdata, option[1], as.integer(ntreelimit))
ret <- .Call(XGBoosterPredict_R, object$handle, newdata, option[1],
as.integer(ntreelimit), as.integer(training))
n_ret <- length(ret)
n_row <- nrow(newdata)

View File

@@ -24,7 +24,7 @@ extern SEXP XGBoosterGetAttr_R(SEXP, SEXP);
extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP);
extern SEXP XGBoosterLoadModel_R(SEXP, SEXP);
extern SEXP XGBoosterModelToRaw_R(SEXP);
extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP);
extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP, SEXP);
extern SEXP XGBoosterSaveModel_R(SEXP, SEXP);
extern SEXP XGBoosterSetAttr_R(SEXP, SEXP, SEXP);
extern SEXP XGBoosterSetParam_R(SEXP, SEXP, SEXP);
@@ -50,7 +50,7 @@ static const R_CallMethodDef CallEntries[] = {
{"XGBoosterLoadModelFromRaw_R", (DL_FUNC) &XGBoosterLoadModelFromRaw_R, 2},
{"XGBoosterLoadModel_R", (DL_FUNC) &XGBoosterLoadModel_R, 2},
{"XGBoosterModelToRaw_R", (DL_FUNC) &XGBoosterModelToRaw_R, 1},
{"XGBoosterPredict_R", (DL_FUNC) &XGBoosterPredict_R, 4},
{"XGBoosterPredict_R", (DL_FUNC) &XGBoosterPredict_R, 5},
{"XGBoosterSaveModel_R", (DL_FUNC) &XGBoosterSaveModel_R, 2},
{"XGBoosterSetAttr_R", (DL_FUNC) &XGBoosterSetAttr_R, 3},
{"XGBoosterSetParam_R", (DL_FUNC) &XGBoosterSetParam_R, 3},

View File

@@ -295,24 +295,26 @@ SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evnames) {
vec_sptr.push_back(vec_names[i].c_str());
}
CHECK_CALL(XGBoosterEvalOneIter(R_ExternalPtrAddr(handle),
asInteger(iter),
BeginPtr(vec_dmats),
BeginPtr(vec_sptr),
len, &ret));
asInteger(iter),
BeginPtr(vec_dmats),
BeginPtr(vec_sptr),
len, &ret));
R_API_END();
return mkString(ret);
}
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit) {
SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask,
SEXP ntree_limit, SEXP training) {
SEXP ret;
R_API_BEGIN();
bst_ulong olen;
const float *res;
CHECK_CALL(XGBoosterPredict(R_ExternalPtrAddr(handle),
R_ExternalPtrAddr(dmat),
asInteger(option_mask),
asInteger(ntree_limit),
&olen, &res));
R_ExternalPtrAddr(dmat),
asInteger(option_mask),
asInteger(ntree_limit),
0,
&olen, &res));
ret = PROTECT(allocVector(REALSXP, olen));
for (size_t i = 0; i < olen; ++i) {
REAL(ret)[i] = res[i];

View File

@@ -148,8 +148,10 @@ XGB_DLL SEXP XGBoosterEvalOneIter_R(SEXP handle, SEXP iter, SEXP dmats, SEXP evn
* \param dmat data matrix
* \param option_mask output_margin:1 predict_leaf:2
* \param ntree_limit limit number of trees used in prediction
* \param training Whether the prediction value is used for training.
*/
XGB_DLL SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask, SEXP ntree_limit);
XGB_DLL SEXP XGBoosterPredict_R(SEXP handle, SEXP dmat, SEXP option_mask,
SEXP ntree_limit, SEXP training);
/*!
* \brief load model from existing file
* \param handle handle

View File

@@ -166,7 +166,7 @@ test_that("SHAPs sum to predictions, with or without DART", {
nrounds = nrounds)
pr <- function(...)
predict(fit, newdata = d, ntreelimit = nrounds, ...)
predict(fit, newdata = d, ...)
pred <- pr()
shap <- pr(predcontrib = T)
shapi <- pr(predinteraction = T)