Define best_iteration only if early stopping is used. (#9403)
* Define `best_iteration` only if early stopping is used. This is the behavior specified by the document but not honored in the actual code. - Don't set the attributes if there's no early stopping. - Clean up the code for callbacks, and replace assertions with proper exceptions. - Assign the attributes when early stopping `save_best` is used. - Turn the attributes into Python properties. --------- Co-authored-by: Philip Hyunsu Cho <chohyu01@cs.washington.edu>
This commit is contained in:
@@ -134,13 +134,17 @@ class CallbackContainer:
|
||||
is_cv: bool = False,
|
||||
) -> None:
|
||||
self.callbacks = set(callbacks)
|
||||
if metric is not None:
|
||||
msg = (
|
||||
"metric must be callable object for monitoring. For "
|
||||
+ "builtin metrics, passing them in training parameter"
|
||||
+ " will invoke monitor automatically."
|
||||
)
|
||||
assert callable(metric), msg
|
||||
for cb in callbacks:
|
||||
if not isinstance(cb, TrainingCallback):
|
||||
raise TypeError("callback must be an instance of `TrainingCallback`.")
|
||||
|
||||
msg = (
|
||||
"metric must be callable object for monitoring. For builtin metrics"
|
||||
", passing them in training parameter invokes monitor automatically."
|
||||
)
|
||||
if metric is not None and not callable(metric):
|
||||
raise TypeError(msg)
|
||||
|
||||
self.metric = metric
|
||||
self.history: TrainingCallback.EvalsLog = collections.OrderedDict()
|
||||
self._output_margin = output_margin
|
||||
@@ -170,16 +174,6 @@ class CallbackContainer:
|
||||
else:
|
||||
assert isinstance(model, Booster), msg
|
||||
|
||||
if not self.is_cv:
|
||||
if model.attr("best_score") is not None:
|
||||
model.best_score = float(cast(str, model.attr("best_score")))
|
||||
model.best_iteration = int(cast(str, model.attr("best_iteration")))
|
||||
else:
|
||||
# Due to compatibility with version older than 1.4, these attributes are
|
||||
# added to Python object even if early stopping is not used.
|
||||
model.best_iteration = model.num_boosted_rounds() - 1
|
||||
model.set_attr(best_iteration=str(model.best_iteration))
|
||||
|
||||
return model
|
||||
|
||||
def before_iteration(
|
||||
@@ -267,9 +261,14 @@ class LearningRateScheduler(TrainingCallback):
|
||||
def __init__(
|
||||
self, learning_rates: Union[Callable[[int], float], Sequence[float]]
|
||||
) -> None:
|
||||
assert callable(learning_rates) or isinstance(
|
||||
if not callable(learning_rates) and not isinstance(
|
||||
learning_rates, collections.abc.Sequence
|
||||
)
|
||||
):
|
||||
raise TypeError(
|
||||
"Invalid learning rates, expecting callable or sequence, got: "
|
||||
f"{type(learning_rates)}"
|
||||
)
|
||||
|
||||
if callable(learning_rates):
|
||||
self.learning_rates = learning_rates
|
||||
else:
|
||||
@@ -302,24 +301,28 @@ class EarlyStopping(TrainingCallback):
|
||||
save_best :
|
||||
Whether training should return the best model or the last model.
|
||||
min_delta :
|
||||
Minimum absolute change in score to be qualified as an improvement.
|
||||
|
||||
.. versionadded:: 1.5.0
|
||||
|
||||
.. code-block:: python
|
||||
Minimum absolute change in score to be qualified as an improvement.
|
||||
|
||||
es = xgboost.callback.EarlyStopping(
|
||||
rounds=2,
|
||||
min_delta=1e-3,
|
||||
save_best=True,
|
||||
maximize=False,
|
||||
data_name="validation_0",
|
||||
metric_name="mlogloss",
|
||||
)
|
||||
clf = xgboost.XGBClassifier(tree_method="gpu_hist", callbacks=[es])
|
||||
Examples
|
||||
--------
|
||||
|
||||
X, y = load_digits(return_X_y=True)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
.. code-block:: python
|
||||
|
||||
es = xgboost.callback.EarlyStopping(
|
||||
rounds=2,
|
||||
min_delta=1e-3,
|
||||
save_best=True,
|
||||
maximize=False,
|
||||
data_name="validation_0",
|
||||
metric_name="mlogloss",
|
||||
)
|
||||
clf = xgboost.XGBClassifier(tree_method="hist", device="cuda", callbacks=[es])
|
||||
|
||||
X, y = load_digits(return_X_y=True)
|
||||
clf.fit(X, y, eval_set=[(X, y)])
|
||||
"""
|
||||
|
||||
# pylint: disable=too-many-arguments
|
||||
@@ -363,7 +366,7 @@ class EarlyStopping(TrainingCallback):
|
||||
return numpy.greater(get_s(new) - self._min_delta, get_s(best))
|
||||
|
||||
def minimize(new: _Score, best: _Score) -> bool:
|
||||
"""New score should be smaller than the old one."""
|
||||
"""New score should be lesser than the old one."""
|
||||
return numpy.greater(get_s(best) - self._min_delta, get_s(new))
|
||||
|
||||
if self.maximize is None:
|
||||
@@ -419,38 +422,53 @@ class EarlyStopping(TrainingCallback):
|
||||
) -> bool:
|
||||
epoch += self.starting_round # training continuation
|
||||
msg = "Must have at least 1 validation dataset for early stopping."
|
||||
assert len(evals_log.keys()) >= 1, msg
|
||||
data_name = ""
|
||||
if len(evals_log.keys()) < 1:
|
||||
raise ValueError(msg)
|
||||
|
||||
# Get data name
|
||||
if self.data:
|
||||
for d, _ in evals_log.items():
|
||||
if d == self.data:
|
||||
data_name = d
|
||||
if not data_name:
|
||||
raise ValueError("No dataset named:", self.data)
|
||||
data_name = self.data
|
||||
else:
|
||||
# Use the last one as default.
|
||||
data_name = list(evals_log.keys())[-1]
|
||||
assert isinstance(data_name, str) and data_name
|
||||
if data_name not in evals_log:
|
||||
raise ValueError(f"No dataset named: {data_name}")
|
||||
|
||||
if not isinstance(data_name, str):
|
||||
raise TypeError(
|
||||
f"The name of the dataset should be a string. Got: {type(data_name)}"
|
||||
)
|
||||
data_log = evals_log[data_name]
|
||||
|
||||
# Filter out scores that can not be used for early stopping.
|
||||
# Get metric name
|
||||
if self.metric_name:
|
||||
metric_name = self.metric_name
|
||||
else:
|
||||
# Use last metric by default.
|
||||
assert isinstance(data_log, collections.OrderedDict)
|
||||
metric_name = list(data_log.keys())[-1]
|
||||
if metric_name not in data_log:
|
||||
raise ValueError(f"No metric named: {metric_name}")
|
||||
|
||||
# The latest score
|
||||
score = data_log[metric_name][-1]
|
||||
return self._update_rounds(score, data_name, metric_name, model, epoch)
|
||||
|
||||
def after_training(self, model: _Model) -> _Model:
|
||||
if not self.save_best:
|
||||
return model
|
||||
|
||||
try:
|
||||
if self.save_best:
|
||||
model = model[: int(model.attr("best_iteration")) + 1]
|
||||
best_iteration = model.best_iteration
|
||||
best_score = model.best_score
|
||||
assert best_iteration is not None and best_score is not None
|
||||
model = model[: best_iteration + 1]
|
||||
model.best_iteration = best_iteration
|
||||
model.best_score = best_score
|
||||
except XGBoostError as e:
|
||||
raise XGBoostError(
|
||||
"`save_best` is not applicable to current booster"
|
||||
"`save_best` is not applicable to the current booster"
|
||||
) from e
|
||||
|
||||
return model
|
||||
|
||||
|
||||
@@ -462,8 +480,6 @@ class EvaluationMonitor(TrainingCallback):
|
||||
Parameters
|
||||
----------
|
||||
|
||||
metric :
|
||||
Extra user defined metric.
|
||||
rank :
|
||||
Which worker should be used for printing the result.
|
||||
period :
|
||||
|
||||
@@ -1890,7 +1890,7 @@ class Booster:
|
||||
attr_names = from_cstr_to_pystr(sarr, length)
|
||||
return {n: self.attr(n) for n in attr_names}
|
||||
|
||||
def set_attr(self, **kwargs: Optional[str]) -> None:
|
||||
def set_attr(self, **kwargs: Optional[Any]) -> None:
|
||||
"""Set the attribute of the Booster.
|
||||
|
||||
Parameters
|
||||
@@ -2559,10 +2559,35 @@ class Booster:
|
||||
else:
|
||||
raise TypeError("Unknown file type: ", fname)
|
||||
|
||||
if self.attr("best_iteration") is not None:
|
||||
self.best_iteration = int(cast(int, self.attr("best_iteration")))
|
||||
if self.attr("best_score") is not None:
|
||||
self.best_score = float(cast(float, self.attr("best_score")))
|
||||
@property
|
||||
def best_iteration(self) -> int:
|
||||
"""The best iteration during training."""
|
||||
best = self.attr("best_iteration")
|
||||
if best is not None:
|
||||
return int(best)
|
||||
|
||||
raise AttributeError(
|
||||
"`best_iteration` is only defined when early stopping is used."
|
||||
)
|
||||
|
||||
@best_iteration.setter
|
||||
def best_iteration(self, iteration: int) -> None:
|
||||
self.set_attr(best_iteration=iteration)
|
||||
|
||||
@property
|
||||
def best_score(self) -> float:
|
||||
"""The best evaluation score during training."""
|
||||
best = self.attr("best_score")
|
||||
if best is not None:
|
||||
return float(best)
|
||||
|
||||
raise AttributeError(
|
||||
"`best_score` is only defined when early stopping is used."
|
||||
)
|
||||
|
||||
@best_score.setter
|
||||
def best_score(self, score: int) -> None:
|
||||
self.set_attr(best_score=score)
|
||||
|
||||
def num_boosted_rounds(self) -> int:
|
||||
"""Get number of boosted rounds. For gblinear this is reset to 0 after
|
||||
|
||||
@@ -230,10 +230,10 @@ __model_doc = f"""
|
||||
subsample : Optional[float]
|
||||
Subsample ratio of the training instance.
|
||||
sampling_method :
|
||||
Sampling method. Used only by `gpu_hist` tree method.
|
||||
- `uniform`: select random training instances uniformly.
|
||||
- `gradient_based` select random training instances with higher probability when
|
||||
the gradient and hessian are larger. (cf. CatBoost)
|
||||
Sampling method. Used only by the GPU version of ``hist`` tree method.
|
||||
- ``uniform``: select random training instances uniformly.
|
||||
- ``gradient_based`` select random training instances with higher probability
|
||||
when the gradient and hessian are larger. (cf. CatBoost)
|
||||
colsample_bytree : Optional[float]
|
||||
Subsample ratio of columns when constructing each tree.
|
||||
colsample_bylevel : Optional[float]
|
||||
@@ -992,12 +992,12 @@ class XGBModel(XGBModelBase):
|
||||
X :
|
||||
Feature matrix. See :ref:`py-data` for a list of supported types.
|
||||
|
||||
When the ``tree_method`` is set to ``hist`` or ``gpu_hist``, internally, the
|
||||
When the ``tree_method`` is set to ``hist``, internally, the
|
||||
:py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix`
|
||||
for conserving memory. However, this has performance implications when the
|
||||
device of input data is not matched with algorithm. For instance, if the
|
||||
input is a numpy array on CPU but ``gpu_hist`` is used for training, then
|
||||
the data is first processed on CPU then transferred to GPU.
|
||||
input is a numpy array on CPU but ``cuda`` is used for training, then the
|
||||
data is first processed on CPU then transferred to GPU.
|
||||
y :
|
||||
Labels
|
||||
sample_weight :
|
||||
@@ -1279,19 +1279,10 @@ class XGBModel(XGBModelBase):
|
||||
)
|
||||
return np.array(feature_names)
|
||||
|
||||
def _early_stopping_attr(self, attr: str) -> Union[float, int]:
|
||||
booster = self.get_booster()
|
||||
try:
|
||||
return getattr(booster, attr)
|
||||
except AttributeError as e:
|
||||
raise AttributeError(
|
||||
f"`{attr}` in only defined when early stopping is used."
|
||||
) from e
|
||||
|
||||
@property
|
||||
def best_score(self) -> float:
|
||||
"""The best score obtained by early stopping."""
|
||||
return float(self._early_stopping_attr("best_score"))
|
||||
return self.get_booster().best_score
|
||||
|
||||
@property
|
||||
def best_iteration(self) -> int:
|
||||
@@ -1299,7 +1290,7 @@ class XGBModel(XGBModelBase):
|
||||
for instance if the best iteration is the first round, then best_iteration is 0.
|
||||
|
||||
"""
|
||||
return int(self._early_stopping_attr("best_iteration"))
|
||||
return self.get_booster().best_iteration
|
||||
|
||||
@property
|
||||
def feature_importances_(self) -> np.ndarray:
|
||||
@@ -1926,12 +1917,12 @@ class XGBRanker(XGBModel, XGBRankerMixIn):
|
||||
| 1 | :math:`x_{20}` | :math:`x_{21}` |
|
||||
+-----+----------------+----------------+
|
||||
|
||||
When the ``tree_method`` is set to ``hist`` or ``gpu_hist``, internally, the
|
||||
When the ``tree_method`` is set to ``hist``, internally, the
|
||||
:py:class:`QuantileDMatrix` will be used instead of the :py:class:`DMatrix`
|
||||
for conserving memory. However, this has performance implications when the
|
||||
device of input data is not matched with algorithm. For instance, if the
|
||||
input is a numpy array on CPU but ``gpu_hist`` is used for training, then
|
||||
the data is first processed on CPU then transferred to GPU.
|
||||
input is a numpy array on CPU but ``cuda`` is used for training, then the
|
||||
data is first processed on CPU then transferred to GPU.
|
||||
y :
|
||||
Labels
|
||||
group :
|
||||
|
||||
@@ -28,17 +28,6 @@ from .core import (
|
||||
_CVFolds = Sequence["CVPack"]
|
||||
|
||||
|
||||
def _assert_new_callback(callbacks: Optional[Sequence[TrainingCallback]]) -> None:
|
||||
is_new_callback: bool = not callbacks or all(
|
||||
isinstance(c, TrainingCallback) for c in callbacks
|
||||
)
|
||||
if not is_new_callback:
|
||||
link = "https://xgboost.readthedocs.io/en/latest/python/callbacks.html"
|
||||
raise ValueError(
|
||||
f"Old style callback was removed in version 1.6. See: {link}."
|
||||
)
|
||||
|
||||
|
||||
def _configure_custom_metric(
|
||||
feval: Optional[Metric], custom_metric: Optional[Metric]
|
||||
) -> Optional[Metric]:
|
||||
@@ -170,7 +159,6 @@ def train(
|
||||
bst = Booster(params, [dtrain] + [d[0] for d in evals], model_file=xgb_model)
|
||||
start_iteration = 0
|
||||
|
||||
_assert_new_callback(callbacks)
|
||||
if verbose_eval:
|
||||
verbose_eval = 1 if verbose_eval is True else verbose_eval
|
||||
callbacks.append(EvaluationMonitor(period=verbose_eval))
|
||||
@@ -247,7 +235,7 @@ class _PackedBooster:
|
||||
result = [f.eval(iteration, feval, output_margin) for f in self.cvfolds]
|
||||
return result
|
||||
|
||||
def set_attr(self, **kwargs: Optional[str]) -> Any:
|
||||
def set_attr(self, **kwargs: Optional[Any]) -> Any:
|
||||
"""Iterate through folds for setting attributes"""
|
||||
for f in self.cvfolds:
|
||||
f.bst.set_attr(**kwargs)
|
||||
@@ -274,11 +262,20 @@ class _PackedBooster:
|
||||
"""Get best_iteration"""
|
||||
return int(cast(int, self.cvfolds[0].bst.attr("best_iteration")))
|
||||
|
||||
@best_iteration.setter
|
||||
def best_iteration(self, iteration: int) -> None:
|
||||
"""Get best_iteration"""
|
||||
self.set_attr(best_iteration=iteration)
|
||||
|
||||
@property
|
||||
def best_score(self) -> float:
|
||||
"""Get best_score."""
|
||||
return float(cast(float, self.cvfolds[0].bst.attr("best_score")))
|
||||
|
||||
@best_score.setter
|
||||
def best_score(self, score: float) -> None:
|
||||
self.set_attr(best_score=score)
|
||||
|
||||
|
||||
def groups_to_rows(groups: List[np.ndarray], boundaries: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
@@ -551,7 +548,6 @@ def cv(
|
||||
|
||||
# setup callbacks
|
||||
callbacks = [] if callbacks is None else copy.copy(list(callbacks))
|
||||
_assert_new_callback(callbacks)
|
||||
|
||||
if verbose_eval:
|
||||
verbose_eval = 1 if verbose_eval is True else verbose_eval
|
||||
|
||||
Reference in New Issue
Block a user