Guard against index error in prediction. (#6982)

* Remove `best_ntree_limit` from documents.
2021-05-25 23:24:59 +08:00
parent c6d87e5e18
commit 86e60e3ba8
5 changed files with 24 additions and 8 deletions
--- a/python-package/xgboost/callback.py
+++ b/python-package/xgboost/callback.py
@@ -165,9 +165,7 @@ def early_stop(stopping_rounds, maximize=False, verbose=True):
    If there's more than one, will use the last.
    Returns the model from the last iteration (not the best one).
    If early stopping occurs, the model will have three additional fields:
-    ``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
-    (Use ``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree``
-    and/or ``num_class`` appears in the parameters)
+    ``bst.best_score``, ``bst.best_iteration``.

    Parameters
    ----------
--- a/python-package/xgboost/sklearn.py
+++ b/python-package/xgboost/sklearn.py
@@ -687,7 +687,7 @@ class XGBModel(XGBModelBase):
            used for early stopping.

            If early stopping occurs, the model will have three additional fields:
-            ``clf.best_score``, ``clf.best_iteration`` and ``clf.best_ntree_limit``.
+            ``clf.best_score``, ``clf.best_iteration``.
        verbose :
            If `verbose` and an evaluation set is used, writes the evaluation metric
            measured on the validation set to stderr.
--- a/python-package/xgboost/training.py
+++ b/python-package/xgboost/training.py
@@ -144,10 +144,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
        If there's more than one metric in the **eval_metric** parameter given in
        **params**, the last metric will be used for early stopping.
        If early stopping occurs, the model will have three additional fields:
-        ``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.  Use
-        ``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree`` and/or
-        ``num_class`` appears in the parameters.  ``best_ntree_limit`` is the result of
-        ``num_parallel_tree * best_iteration``.
+        ``bst.best_score``, ``bst.best_iteration``.
    evals_result: dict
        This dictionary stores the evaluation results of all the items in watchlist.

--- a/src/gbm/gbtree.cc
+++ b/src/gbm/gbtree.cc
@@ -501,6 +501,7 @@ void GBTree::PredictBatch(DMatrix* p_fmat,
  uint32_t tree_begin, tree_end;
  std::tie(tree_begin, tree_end) =
      detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
+  CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
  if (tree_end > tree_begin) {
    predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end);
  }
--- a/tests/python/test_predict.py
+++ b/tests/python/test_predict.py
@@ -145,6 +145,7 @@ class TestInplacePredict:
        dtrain = xgb.DMatrix(cls.X, cls.y)
        cls.test = xgb.DMatrix(cls.X[:10, ...], missing=cls.missing)

+        cls.num_boost_round = 10
        cls.booster = xgb.train({'tree_method': 'hist'}, dtrain, num_boost_round=10)

    def test_predict(self):
@@ -172,6 +173,25 @@ class TestInplacePredict:

        np.testing.assert_allclose(predt_from_dmatrix, predt_from_array)

+        with pytest.raises(ValueError):
+            booster.predict(test, ntree_limit=booster.best_ntree_limit + 1)
+        with pytest.raises(ValueError):
+            booster.predict(test, iteration_range=(0, booster.best_iteration + 2))
+
+        default = booster.predict(test)
+
+        range_full = booster.predict(test, iteration_range=(0, self.num_boost_round))
+        ntree_full = booster.predict(test, ntree_limit=self.num_boost_round)
+        np.testing.assert_allclose(range_full, default)
+        np.testing.assert_allclose(ntree_full, default)
+
+        range_full = booster.predict(
+            test, iteration_range=(0, booster.best_iteration + 1)
+        )
+        ntree_full = booster.predict(test, ntree_limit=booster.best_ntree_limit)
+        np.testing.assert_allclose(range_full, default)
+        np.testing.assert_allclose(ntree_full, default)
+
        def predict_dense(x):
            inplace_predt = booster.inplace_predict(x)
            d = xgb.DMatrix(x)