Guard against index error in prediction. (#6982)

* Remove `best_ntree_limit` from documents.
This commit is contained in:
Jiaming Yuan 2021-05-25 23:24:59 +08:00 committed by GitHub
parent c6d87e5e18
commit 86e60e3ba8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 24 additions and 8 deletions

View File

@ -165,9 +165,7 @@ def early_stop(stopping_rounds, maximize=False, verbose=True):
If there's more than one, will use the last.
Returns the model from the last iteration (not the best one).
If early stopping occurs, the model will have three additional fields:
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``.
(Use ``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree``
and/or ``num_class`` appears in the parameters)
``bst.best_score``, ``bst.best_iteration``.
Parameters
----------

View File

@ -687,7 +687,7 @@ class XGBModel(XGBModelBase):
used for early stopping.
If early stopping occurs, the model will have three additional fields:
``clf.best_score``, ``clf.best_iteration`` and ``clf.best_ntree_limit``.
``clf.best_score``, ``clf.best_iteration``.
verbose :
If `verbose` and an evaluation set is used, writes the evaluation metric
measured on the validation set to stderr.

View File

@ -144,10 +144,7 @@ def train(params, dtrain, num_boost_round=10, evals=(), obj=None, feval=None,
If there's more than one metric in the **eval_metric** parameter given in
**params**, the last metric will be used for early stopping.
If early stopping occurs, the model will have three additional fields:
``bst.best_score``, ``bst.best_iteration`` and ``bst.best_ntree_limit``. Use
``bst.best_ntree_limit`` to get the correct value if ``num_parallel_tree`` and/or
``num_class`` appears in the parameters. ``best_ntree_limit`` is the result of
``num_parallel_tree * best_iteration``.
``bst.best_score``, ``bst.best_iteration``.
evals_result: dict
This dictionary stores the evaluation results of all the items in watchlist.

View File

@ -501,6 +501,7 @@ void GBTree::PredictBatch(DMatrix* p_fmat,
uint32_t tree_begin, tree_end;
std::tie(tree_begin, tree_end) =
detail::LayerToTree(model_, tparam_, layer_begin, layer_end);
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
if (tree_end > tree_begin) {
predictor->PredictBatch(p_fmat, out_preds, model_, tree_begin, tree_end);
}

View File

@ -145,6 +145,7 @@ class TestInplacePredict:
dtrain = xgb.DMatrix(cls.X, cls.y)
cls.test = xgb.DMatrix(cls.X[:10, ...], missing=cls.missing)
cls.num_boost_round = 10
cls.booster = xgb.train({'tree_method': 'hist'}, dtrain, num_boost_round=10)
def test_predict(self):
@ -172,6 +173,25 @@ class TestInplacePredict:
np.testing.assert_allclose(predt_from_dmatrix, predt_from_array)
with pytest.raises(ValueError):
booster.predict(test, ntree_limit=booster.best_ntree_limit + 1)
with pytest.raises(ValueError):
booster.predict(test, iteration_range=(0, booster.best_iteration + 2))
default = booster.predict(test)
range_full = booster.predict(test, iteration_range=(0, self.num_boost_round))
ntree_full = booster.predict(test, ntree_limit=self.num_boost_round)
np.testing.assert_allclose(range_full, default)
np.testing.assert_allclose(ntree_full, default)
range_full = booster.predict(
test, iteration_range=(0, booster.best_iteration + 1)
)
ntree_full = booster.predict(test, ntree_limit=booster.best_ntree_limit)
np.testing.assert_allclose(range_full, default)
np.testing.assert_allclose(ntree_full, default)
def predict_dense(x):
inplace_predt = booster.inplace_predict(x)
d = xgb.DMatrix(x)