Revert "Fix #3485, #3540: Don't use dropout for predicting test sets" (#3563)

* Revert "Fix #3485, #3540: Don't use dropout for predicting test sets (#3556)"

This reverts commit 44811f233071c5805d70c287abd22b155b732727.

* Document behavior of predict() for DART booster

* Add notice to parameter.rst
This commit is contained in:
Philip Hyunsu Cho 2018-08-08 09:48:55 -07:00 committed by GitHub
parent e3e776bd58
commit 3c72654e3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 61 additions and 30 deletions

View File

@ -12,6 +12,10 @@ Before running XGBoost, we must set three types of parameters: general parameter
In R-package, you can use ``.`` (dot) to replace underscore in the parameters, for example, you can use ``max.depth`` to indicate ``max_depth``. The underscore parameters are also valid in R.
.. contents::
:backlinks: none
:local:
******************
General Parameters
******************
@ -172,6 +176,18 @@ Parameters for Tree Booster
Additional parameters for Dart Booster (``booster=dart``)
=========================================================
.. note:: Using ``predict()`` with DART booster
If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
some of the trees will be evaluated. This will produce incorrect results if ``data`` is
not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
a nonzero value, e.g.
.. code-block:: python
preds = bst.predict(dtest, ntree_limit=num_round)
* ``sample_type`` [default= ``uniform``]
- Type of sampling algorithm.
@ -212,7 +228,7 @@ Additional parameters for Dart Booster (``booster=dart``)
- range: [0.0, 1.0]
Parameters for Linear Booster (``booster=gblinear``)
==================================================
====================================================
* ``lambda`` [default=0, alias: ``reg_lambda``]
- L2 regularization term on weights. Increasing this value will make model more conservative. Normalised to number of training examples.

View File

@ -111,3 +111,9 @@ Sample Script
# make prediction
# ntree_limit must not be 0
preds = bst.predict(dtest, ntree_limit=num_round)
.. note:: Specify ``ntree_limit`` when predicting with test sets
By default, ``bst.predict()`` will perform dropouts on trees. To obtain
correct results on test sets, disable dropouts by specifying
a nonzero value for ``ntree_limit``.

View File

@ -76,14 +76,11 @@ class GradientBooster {
* \brief generate predictions for given feature matrix
* \param dmat feature matrix
* \param out_preds output vector to hold the predictions
* \param dropout whether dropout should be applied to prediction
* This option is only meaningful if booster='dart'; otherwise ignored.
* \param ntree_limit limit the number of trees used in prediction, when it equals 0, this means
* we do not limit number of trees, this parameter is only valid for gbtree, but not for gblinear
*/
virtual void PredictBatch(DMatrix* dmat,
HostDeviceVector<bst_float>* out_preds,
bool dropout = true,
unsigned ntree_limit = 0) = 0;
/*!
* \brief online prediction function, predict score for one instance at a time

View File

@ -1120,10 +1120,22 @@ class Booster(object):
"""
Predict with data.
NOTE: This function is not thread safe.
For each booster object, predict can only be called from one thread.
If you want to run prediction using multiple thread, call bst.copy() to make copies
of model object and then call predict
.. note:: This function is not thread safe.
For each booster object, predict can only be called from one thread.
If you want to run prediction using multiple thread, call ``bst.copy()`` to make copies
of model object and then call ``predict()``.
.. note:: Using ``predict()`` with DART booster
If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
some of the trees will be evaluated. This will produce incorrect results if ``data`` is
not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
a nonzero value, e.g.
.. code-block:: python
preds = bst.predict(dtest, ntree_limit=num_round)
Parameters
----------

View File

@ -563,10 +563,24 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
def predict(self, data, output_margin=False, ntree_limit=None):
"""
Predict with `data`.
NOTE: This function is not thread safe.
For each booster object, predict can only be called from one thread.
If you want to run prediction using multiple thread, call xgb.copy() to make copies
of model object and then call predict
.. note:: This function is not thread safe.
For each booster object, predict can only be called from one thread.
If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies
of model object and then call ``predict()``.
.. note:: Using ``predict()`` with DART booster
If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
some of the trees will be evaluated. This will produce incorrect results if ``data`` is
not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
a nonzero value, e.g.
.. code-block:: python
preds = bst.predict(dtest, ntree_limit=num_round)
Parameters
----------
data : DMatrix

View File

@ -103,7 +103,6 @@ class GBLinear : public GradientBooster {
void PredictBatch(DMatrix *p_fmat,
HostDeviceVector<bst_float> *out_preds,
bool dropout,
unsigned ntree_limit) override {
monitor_.Start("PredictBatch");
CHECK_EQ(ntree_limit, 0U)

View File

@ -217,7 +217,6 @@ class GBTree : public GradientBooster {
void PredictBatch(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_preds,
bool dropout,
unsigned ntree_limit) override {
predictor_->PredictBatch(p_fmat, out_preds, model_, 0, ntree_limit);
}
@ -357,11 +356,8 @@ class Dart : public GBTree {
// predict the leaf scores with dropout if ntree_limit = 0
void PredictBatch(DMatrix* p_fmat,
HostDeviceVector<bst_float>* out_preds,
bool dropout,
unsigned ntree_limit) override {
if (dropout) {
DropTrees(ntree_limit);
}
DropTrees(ntree_limit);
PredLoopInternal<Dart>(p_fmat, &out_preds->HostVector(), 0, ntree_limit, true);
}

View File

@ -469,7 +469,7 @@ class LearnerImpl : public Learner {
} else if (pred_leaf) {
gbm_->PredictLeaf(data, &out_preds->HostVector(), ntree_limit);
} else {
this->PredictRaw(data, out_preds, false, ntree_limit);
this->PredictRaw(data, out_preds, ntree_limit);
if (!output_margin) {
obj_->PredTransform(out_preds);
}
@ -560,16 +560,14 @@ class LearnerImpl : public Learner {
* \brief get un-transformed prediction
* \param data training data matrix
* \param out_preds output vector that stores the prediction
* \param dropout whether dropout should be applied to prediction.
* This option is only meaningful if booster='dart'; otherwise ignored.
* \param ntree_limit limit number of trees used for boosted tree
* predictor, when it equals 0, this means we are using all the trees
*/
inline void PredictRaw(DMatrix* data, HostDeviceVector<bst_float>* out_preds,
bool dropout = true, unsigned ntree_limit = 0) const {
unsigned ntree_limit = 0) const {
CHECK(gbm_ != nullptr)
<< "Predict must happen after Load or InitModel";
gbm_->PredictBatch(data, out_preds, dropout, ntree_limit);
gbm_->PredictBatch(data, out_preds, ntree_limit);
}
// model parameter

View File

@ -48,13 +48,6 @@ class TestModels(unittest.TestCase):
preds2 = bst2.predict(dtest2, ntree_limit=num_round)
# assert they are the same
assert np.sum(np.abs(preds2 - preds)) == 0
# regression test for issues #3485, #3540
for _ in range(10):
bst3 = xgb.Booster(params=param, model_file='xgb.model.dart')
dtest3 = xgb.DMatrix('dtest.buffer')
preds3 = bst3.predict(dtest3)
# assert they are the same
assert np.sum(np.abs(preds3 - preds)) == 0, 'preds3 = {}, preds = {}'.format(preds3, preds)
# check whether sample_type and normalize_type work
num_round = 50