Guard against index error in prediction. (#6982)

* Remove `best_ntree_limit` from documents.
This commit is contained in:
Jiaming Yuan 2021-05-25 23:24:59 +08:00 committed by GitHub
parent c6d87e5e18
commit 86e60e3ba8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 24 additions and 8 deletions

View File

@ -165,9 +165,7 @@ def early_stop(stopping_rounds, maximize=False, verbose=True):
If there's more than one, will use the last. If there's more than one, will use the last.
Returns the model from the last iteration (not the best one). Returns the model from the last iteration (not the best one).
If early stopping occurs, the model will have three additional fields: If early stopping occurs, the model will have three additional fields:
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``. ``bst.best_score``, ``bst.best_iteration``.
(Use ``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree``
and/or ``num_class`` appears in the parameters)
Parameters Parameters
---------- ----------

View File

@ -687,7 +687,7 @@ class XGBModel(XGBModelBase):
used for early stopping. used for early stopping.
If early stopping occurs, the model will have three additional fields: If early stopping occurs, the model will have three additional fields:
``clf.best_score``, ``clf.best_iteration`` and ``clf.best_ntree_limit``. ``clf.best_score``, ``clf.best_iteration``.
verbose : verbose :
If `verbose` and an evaluation set is used, writes the evaluation metric If `verbose` and an evaluation set is used, writes the evaluation metric
measured on the validation set to stderr. measured on the validation set to stderr.

View File

@ -144,10 +144,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
If there's more than one metric in the **eval_metric** parameter given in If there's more than one metric in the **eval_metric** parameter given in
**params**, the last metric will be used for early stopping. **params**, the last metric will be used for early stopping.
If early stopping occurs, the model will have three additional fields: If early stopping occurs, the model will have three additional fields:
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``. Use ``bst.best_score``, ``bst.best_iteration``.
``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree`` and/or
``num_class`` appears in the parameters. ``best_ntree_limit`` is the result of
``num_parallel_tree * best_iteration``.
evals_result: dict evals_result: dict
This dictionary stores the evaluation results of all the items in watchlist. This dictionary stores the evaluation results of all the items in watchlist.

View File

@ -501,6 +501,7 @@ void GBTree::PredictBatch(DMatrix* p_fmat,
uint32_t tree_begin, tree_end; uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) = std::tie(tree_begin, tree_end) =
detail::LayerToTree(model_, tparam_, layer_begin, layer_end); detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
if (tree_end > tree_begin) { if (tree_end > tree_begin) {
predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end); predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end);
} }

View File

@ -145,6 +145,7 @@ class TestInplacePredict:
dtrain = xgb.DMatrix(cls.X, cls.y) dtrain = xgb.DMatrix(cls.X, cls.y)
cls.test = xgb.DMatrix(cls.X[:10, ...], missing=cls.missing) cls.test = xgb.DMatrix(cls.X[:10, ...], missing=cls.missing)
cls.num_boost_round = 10
cls.booster = xgb.train({'tree_method': 'hist'}, dtrain, num_boost_round=10) cls.booster = xgb.train({'tree_method': 'hist'}, dtrain, num_boost_round=10)
def test_predict(self): def test_predict(self):
@ -172,6 +173,25 @@ class TestInplacePredict:
np.testing.assert_allclose(predt_from_dmatrix, predt_from_array) np.testing.assert_allclose(predt_from_dmatrix, predt_from_array)
with pytest.raises(ValueError):
booster.predict(test, ntree_limit=booster.best_ntree_limit + 1)
with pytest.raises(ValueError):
booster.predict(test, iteration_range=(0, booster.best_iteration + 2))
default = booster.predict(test)
range_full = booster.predict(test, iteration_range=(0, self.num_boost_round))
ntree_full = booster.predict(test, ntree_limit=self.num_boost_round)
np.testing.assert_allclose(range_full, default)
np.testing.assert_allclose(ntree_full, default)
range_full = booster.predict(
test, iteration_range=(0, booster.best_iteration + 1)
)
ntree_full = booster.predict(test, ntree_limit=booster.best_ntree_limit)
np.testing.assert_allclose(range_full, default)
np.testing.assert_allclose(ntree_full, default)
def predict_dense(x): def predict_dense(x):
inplace_predt = booster.inplace_predict(x) inplace_predt = booster.inplace_predict(x)
d = xgb.DMatrix(x) d = xgb.DMatrix(x)