From 44811f233071c5805d70c287abd22b155b732727 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Sun, 5 Aug 2018 10:17:21 -0700 Subject: [PATCH] Fix #3485, #3540: Don't use dropout for predicting test sets (#3556) * Fix #3485, #3540: Don't use dropout for predicting test sets Dropout (for DART) should only be used at training time. * Add regression test --- include/xgboost/gbm.h | 3 +++ src/gbm/gblinear.cc | 1 + src/gbm/gbtree.cc | 6 +++++- src/learner.cc | 8 +++++--- tests/python/test_basic_models.py | 7 +++++++ 5 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/xgboost/gbm.h b/include/xgboost/gbm.h index cfbea556f..d1292854a 100644 --- a/include/xgboost/gbm.h +++ b/include/xgboost/gbm.h @@ -76,11 +76,14 @@ class GradientBooster { * \brief generate predictions for given feature matrix * \param dmat feature matrix * \param out_preds output vector to hold the predictions + * \param dropout whether dropout should be applied to prediction + * This option is only meaningful if booster='dart'; otherwise ignored. * \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means * we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear */ virtual void PredictBatch(DMatrix* dmat, HostDeviceVector* out_preds, + bool dropout = true, unsigned ntree_limit = 0) = 0; /*! * \brief online prediction function, predict score for one instance at a time diff --git a/src/gbm/gblinear.cc b/src/gbm/gblinear.cc index ed13bb71c..4454aad47 100644 --- a/src/gbm/gblinear.cc +++ b/src/gbm/gblinear.cc @@ -103,6 +103,7 @@ class GBLinear : public GradientBooster { void PredictBatch(DMatrix *p_fmat, HostDeviceVector *out_preds, + bool dropout, unsigned ntree_limit) override { monitor_.Start("PredictBatch"); CHECK_EQ(ntree_limit, 0U) diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index a619114d8..1e086ec09 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -217,6 +217,7 @@ class GBTree : public GradientBooster { void PredictBatch(DMatrix* p_fmat, HostDeviceVector* out_preds, + bool dropout, unsigned ntree_limit) override { predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit); } @@ -356,8 +357,11 @@ class Dart : public GBTree { // predict the leaf scores with dropout if ntree_limit = 0 void PredictBatch(DMatrix* p_fmat, HostDeviceVector* out_preds, + bool dropout, unsigned ntree_limit) override { - DropTrees(ntree_limit); + if (dropout) { + DropTrees(ntree_limit); + } PredLoopInternal(p_fmat, &out_preds->HostVector(), 0, ntree_limit, true); } diff --git a/src/learner.cc b/src/learner.cc index 57c361b6f..98b2f7bb9 100644 --- a/src/learner.cc +++ b/src/learner.cc @@ -469,7 +469,7 @@ class LearnerImpl : public Learner { } else if (pred_leaf) { gbm_->PredictLeaf(data, &out_preds->HostVector(), ntree_limit); } else { - this->PredictRaw(data, out_preds, ntree_limit); + this->PredictRaw(data, out_preds, false, ntree_limit); if (!output_margin) { obj_->PredTransform(out_preds); } @@ -560,14 +560,16 @@ class LearnerImpl : public Learner { * \brief get un-transformed prediction * \param data training data matrix * \param out_preds output vector that stores the prediction + * \param dropout whether dropout should be applied to prediction. + * This option is only meaningful if booster='dart'; otherwise ignored. * \param ntree_limit limit number of trees used for boosted tree * predictor, when it equals 0, this means we are using all the trees */ inline void PredictRaw(DMatrix* data, HostDeviceVector* out_preds, - unsigned ntree_limit = 0) const { + bool dropout = true, unsigned ntree_limit = 0) const { CHECK(gbm_ != nullptr) << "Predict must happen after Load or InitModel"; - gbm_->PredictBatch(data, out_preds, ntree_limit); + gbm_->PredictBatch(data, out_preds, dropout, ntree_limit); } // model parameter diff --git a/tests/python/test_basic_models.py b/tests/python/test_basic_models.py index 0b12ce4c3..c1fe30983 100644 --- a/tests/python/test_basic_models.py +++ b/tests/python/test_basic_models.py @@ -48,6 +48,13 @@ class TestModels(unittest.TestCase): preds2 = bst2.predict(dtest2, ntree_limit=num_round) # assert they are the same assert np.sum(np.abs(preds2 - preds)) == 0 + # regression test for issues #3485, #3540 + for _ in range(10): + bst3 = xgb.Booster(params=param, model_file='xgb.model.dart') + dtest3 = xgb.DMatrix('dtest.buffer') + preds3 = bst3.predict(dtest3) + # assert they are the same + assert np.sum(np.abs(preds3 - preds)) == 0, 'preds3 = {}, preds = {}'.format(preds3, preds) # check whether sample_type and normalize_type work num_round = 50