Use UBJ in Python checkpoint. (#9958)

This commit is contained in:
Jiaming Yuan
2024-01-09 03:22:15 +08:00
committed by GitHub
parent fa5e2f6c45
commit b3eb5d0945
7 changed files with 104 additions and 46 deletions

View File

@@ -62,11 +62,31 @@ class TrainingCallback(ABC):
return model
def before_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
"""Run before each iteration. Return True when training should stop."""
"""Run before each iteration. Returns True when training should stop. See
:py:meth:`after_iteration` for details.
"""
return False
def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
"""Run after each iteration. Return True when training should stop."""
"""Run after each iteration. Returns `True` when training should stop.
Parameters
----------
model :
Eeither a :py:class:`~xgboost.Booster` object or a CVPack if the cv function
in xgboost is being used.
epoch :
The current training iteration.
evals_log :
A dictionary containing the evaluation history:
.. code-block:: python
{"data_name": {"metric_name": [0.5, ...]}}
"""
return False
@@ -547,14 +567,16 @@ class TrainingCheckPoint(TrainingCallback):
.. versionadded:: 1.3.0
Since XGBoost 2.1.0, the default format is changed to UBJSON.
Parameters
----------
directory :
Output model directory.
name :
pattern of output model file. Models will be saved as name_0.json, name_1.json,
name_2.json ....
pattern of output model file. Models will be saved as name_0.ubj, name_1.ubj,
name_2.ubj ....
as_pickle :
When set to True, all training parameters will be saved in pickle format,
instead of saving only the model.
@@ -564,6 +586,8 @@ class TrainingCheckPoint(TrainingCallback):
"""
default_format = "ubj"
def __init__(
self,
directory: Union[str, os.PathLike],
@@ -592,7 +616,7 @@ class TrainingCheckPoint(TrainingCallback):
self._name
+ "_"
+ (str(epoch + self._start))
+ (".pkl" if self._as_pickle else ".json"),
+ (".pkl" if self._as_pickle else f".{self.default_format}"),
)
self._epoch = 0 # reset counter
if collective.get_rank() == 0:

View File

@@ -2591,9 +2591,8 @@ class Booster:
The model is saved in an XGBoost internal format which is universal among the
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
(such as feature_names) will not be saved when using binary format. To save
those attributes, use JSON/UBJ instead. See :doc:`Model IO
</tutorials/saving_model>` for more info.
(such as feature_names) are only saved when using JSON or UBJSON (default)
format. See :doc:`Model IO </tutorials/saving_model>` for more info.
.. code-block:: python
@@ -2616,12 +2615,15 @@ class Booster:
def save_raw(self, raw_format: str = "ubj") -> bytearray:
"""Save the model to a in memory buffer representation instead of file.
The model is saved in an XGBoost internal format which is universal among the
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
(such as feature_names) are only saved when using JSON or UBJSON (default)
format. See :doc:`Model IO </tutorials/saving_model>` for more info.
Parameters
----------
raw_format :
Format of output buffer. Can be `json`, `ubj` or `deprecated`. Right now
the default is `deprecated` but it will be changed to `ubj` (univeral binary
json) in the future.
Format of output buffer. Can be `json`, `ubj` or `deprecated`.
Returns
-------
@@ -2640,11 +2642,10 @@ class Booster:
def load_model(self, fname: ModelIn) -> None:
"""Load the model from a file or a bytearray.
The model is loaded from XGBoost format which is universal among the various
XGBoost interfaces. Auxiliary attributes of the Python Booster object (such as
feature_names) will not be loaded when using binary format. To save those
attributes, use JSON/UBJ instead. See :doc:`Model IO </tutorials/saving_model>`
for more info.
The model is saved in an XGBoost internal format which is universal among the
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
(such as feature_names) are only saved when using JSON or UBJSON (default)
format. See :doc:`Model IO </tutorials/saving_model>` for more info.
.. code-block:: python
@@ -2769,9 +2770,9 @@ class Booster:
with_stats: bool = False,
dump_format: str = "text",
) -> List[str]:
"""Returns the model dump as a list of strings. Unlike :py:meth:`save_model`, the output
format is primarily used for visualization or interpretation, hence it's more
human readable but cannot be loaded back to XGBoost.
"""Returns the model dump as a list of strings. Unlike :py:meth:`save_model`,
the output format is primarily used for visualization or interpretation, hence
it's more human readable but cannot be loaded back to XGBoost.
Parameters
----------