Revert "Fix #3485, #3540: Don't use dropout for predicting test sets" (#3563)

* Revert "Fix #3485, #3540: Don't use dropout for predicting test sets (#3556)" This reverts commit 44811f2330. * Document behavior of predict() for DART booster * Add notice to parameter.rst
2018-08-08 09:48:55 -07:00
parent e3e776bd58
commit 3c72654e3b
9 changed files with 61 additions and 30 deletions
--- a/python-package/xgboost/core.py
+++ b/python-package/xgboost/core.py
@@ -1120,10 +1120,22 @@ class Booster(object):
        """
        Predict with data.

-        NOTE: This function is not thread safe.
-              For each booster object, predict can only be called from one thread.
-              If you want to run prediction using multiple thread, call bst.copy() to make copies
-              of model object and then call predict
+        .. note:: This function is not thread safe.
+
+          For each booster object, predict can only be called from one thread.
+          If you want to run prediction using multiple thread, call ``bst.copy()`` to make copies
+          of model object and then call ``predict()``.
+
+        .. note:: Using ``predict()`` with DART booster
+
+          If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
+          some of the trees will be evaluated. This will produce incorrect results if ``data`` is
+          not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
+          a nonzero value, e.g.
+
+          .. code-block:: python
+
+            preds = bst.predict(dtest, ntree_limit=num_round)

        Parameters
        ----------
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -563,10 +563,24 @@ class XGBClassifier(XGBModel, XGBClassifierBase):
    def predict(self, data, output_margin=False, ntree_limit=None):
        """
        Predict with `data`.
-        NOTE: This function is not thread safe.
-              For each booster object, predict can only be called from one thread.
-              If you want to run prediction using multiple thread, call xgb.copy() to make copies
-              of model object and then call predict
+
+        .. note:: This function is not thread safe.
+
+          For each booster object, predict can only be called from one thread.
+          If you want to run prediction using multiple thread, call ``xgb.copy()`` to make copies
+          of model object and then call ``predict()``.
+
+        .. note:: Using ``predict()`` with DART booster
+
+          If the booster object is DART type, ``predict()`` will perform dropouts, i.e. only
+          some of the trees will be evaluated. This will produce incorrect results if ``data`` is
+          not the training data. To obtain correct results on test sets, set ``ntree_limit`` to
+          a nonzero value, e.g.
+
+          .. code-block:: python
+
+            preds = bst.predict(dtest, ntree_limit=num_round)
+
        Parameters
        ----------
        data : DMatrix