Fix #3485, #3540: Don't use dropout for predicting test sets (#3556)

* Fix #3485, #3540: Don't use dropout for predicting test sets

Dropout (for DART) should only be used at training time.

* Add regression test
This commit is contained in:
Philip Hyunsu Cho
2018-08-05 10:17:21 -07:00
committed by GitHub
parent 109473dae2
commit 44811f2330
5 changed files with 21 additions and 4 deletions

View File

@@ -103,6 +103,7 @@ class GBLinear : public GradientBooster {
void PredictBatch(DMatrix *p_fmat,
HostDeviceVector<bst_float> *out_preds,
bool dropout,
unsigned ntree_limit) override {
monitor_.Start("PredictBatch");
CHECK_EQ(ntree_limit, 0U)

View File

@@ -217,6 +217,7 @@ class GBTree : public GradientBooster {
void PredictBatch(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_preds,
bool dropout,
unsigned ntree_limit) override {
predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
}
@@ -356,8 +357,11 @@ class Dart : public GBTree {
// predict the leaf scores with dropout if ntree_limit = 0
void PredictBatch(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_preds,
bool dropout,
unsigned ntree_limit) override {
DropTrees(ntree_limit);
if (dropout) {
DropTrees(ntree_limit);
}
PredLoopInternal<Dart>(p_fmat, &out_preds->HostVector(), 0, ntree_limit, true);
}

View File

@@ -469,7 +469,7 @@ class LearnerImpl : public Learner {
} else if (pred_leaf) {
gbm_->PredictLeaf(data, &out_preds->HostVector(), ntree_limit);
} else {
this->PredictRaw(data, out_preds, ntree_limit);
this->PredictRaw(data, out_preds, false, ntree_limit);
if (!output_margin) {
obj_->PredTransform(out_preds);
}
@@ -560,14 +560,16 @@ class LearnerImpl : public Learner {
* \brief get un-transformed prediction
* \param data training data matrix
* \param out_preds output vector that stores the prediction
* \param dropout whether dropout should be applied to prediction.
* This option is only meaningful if booster='dart'; otherwise ignored.
* \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees
*/
inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
unsigned ntree_limit = 0) const {
bool dropout = true, unsigned ntree_limit = 0) const {
CHECK(gbm_ != nullptr)
<< "Predict must happen after Load or InitModel";
gbm_->PredictBatch(data, out_preds, ntree_limit);
gbm_->PredictBatch(data, out_preds, dropout, ntree_limit);
}
// model parameter