Fix #3485, #3540: Don't use dropout for predicting test sets (#3556)

* Fix #3485, #3540: Don't use dropout for predicting test sets

Dropout (for DART) should only be used at training time.

* Add regression test
This commit is contained in:
Philip Hyunsu Cho
2018-08-05 10:17:21 -07:00
committed by GitHub
parent 109473dae2
commit 44811f2330
5 changed files with 21 additions and 4 deletions

View File

@@ -469,7 +469,7 @@ class LearnerImpl : public Learner {
} else if (pred_leaf) {
gbm_->PredictLeaf(data, &out_preds->HostVector(), ntree_limit);
} else {
this->PredictRaw(data, out_preds, ntree_limit);
this->PredictRaw(data, out_preds, false, ntree_limit);
if (!output_margin) {
obj_->PredTransform(out_preds);
}
@@ -560,14 +560,16 @@ class LearnerImpl : public Learner {
* \brief get un-transformed prediction
* \param data training data matrix
* \param out_preds output vector that stores the prediction
* \param dropout whether dropout should be applied to prediction.
* This option is only meaningful if booster='dart'; otherwise ignored.
* \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees
*/
inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit = 0) const {
bool dropout = true, unsigned ntree_limit = 0) const {
CHECK(gbm_ != nullptr)
<< "Predict must happen after Load or InitModel";
gbm_->PredictBatch(data, out_preds, ntree_limit);
gbm_->PredictBatch(data, out_preds, dropout, ntree_limit);
}
// model parameter