Use UBJ in Python checkpoint. (#9958)
This commit is contained in:
@@ -62,11 +62,31 @@ class TrainingCallback(ABC):
|
||||
return model
|
||||
|
||||
def before_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
|
||||
"""Run before each iteration. Return True when training should stop."""
|
||||
"""Run before each iteration. Returns True when training should stop. See
|
||||
:py:meth:`after_iteration` for details.
|
||||
|
||||
"""
|
||||
return False
|
||||
|
||||
def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
|
||||
"""Run after each iteration. Return True when training should stop."""
|
||||
"""Run after each iteration. Returns `True` when training should stop.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
model :
|
||||
Eeither a :py:class:`~xgboost.Booster` object or a CVPack if the cv function
|
||||
in xgboost is being used.
|
||||
epoch :
|
||||
The current training iteration.
|
||||
evals_log :
|
||||
A dictionary containing the evaluation history:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{"data_name": {"metric_name": [0.5, ...]}}
|
||||
|
||||
"""
|
||||
return False
|
||||
|
||||
|
||||
@@ -547,14 +567,16 @@ class TrainingCheckPoint(TrainingCallback):
|
||||
|
||||
.. versionadded:: 1.3.0
|
||||
|
||||
Since XGBoost 2.1.0, the default format is changed to UBJSON.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
directory :
|
||||
Output model directory.
|
||||
name :
|
||||
pattern of output model file. Models will be saved as name_0.json, name_1.json,
|
||||
name_2.json ....
|
||||
pattern of output model file. Models will be saved as name_0.ubj, name_1.ubj,
|
||||
name_2.ubj ....
|
||||
as_pickle :
|
||||
When set to True, all training parameters will be saved in pickle format,
|
||||
instead of saving only the model.
|
||||
@@ -564,6 +586,8 @@ class TrainingCheckPoint(TrainingCallback):
|
||||
|
||||
"""
|
||||
|
||||
default_format = "ubj"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
directory: Union[str, os.PathLike],
|
||||
@@ -592,7 +616,7 @@ class TrainingCheckPoint(TrainingCallback):
|
||||
self._name
|
||||
+ "_"
|
||||
+ (str(epoch + self._start))
|
||||
+ (".pkl" if self._as_pickle else ".json"),
|
||||
+ (".pkl" if self._as_pickle else f".{self.default_format}"),
|
||||
)
|
||||
self._epoch = 0 # reset counter
|
||||
if collective.get_rank() == 0:
|
||||
|
||||
@@ -2591,9 +2591,8 @@ class Booster:
|
||||
|
||||
The model is saved in an XGBoost internal format which is universal among the
|
||||
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
|
||||
(such as feature_names) will not be saved when using binary format. To save
|
||||
those attributes, use JSON/UBJ instead. See :doc:`Model IO
|
||||
</tutorials/saving_model>` for more info.
|
||||
(such as feature_names) are only saved when using JSON or UBJSON (default)
|
||||
format. See :doc:`Model IO </tutorials/saving_model>` for more info.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -2616,12 +2615,15 @@ class Booster:
|
||||
def save_raw(self, raw_format: str = "ubj") -> bytearray:
|
||||
"""Save the model to a in memory buffer representation instead of file.
|
||||
|
||||
The model is saved in an XGBoost internal format which is universal among the
|
||||
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
|
||||
(such as feature_names) are only saved when using JSON or UBJSON (default)
|
||||
format. See :doc:`Model IO </tutorials/saving_model>` for more info.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
raw_format :
|
||||
Format of output buffer. Can be `json`, `ubj` or `deprecated`. Right now
|
||||
the default is `deprecated` but it will be changed to `ubj` (univeral binary
|
||||
json) in the future.
|
||||
Format of output buffer. Can be `json`, `ubj` or `deprecated`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -2640,11 +2642,10 @@ class Booster:
|
||||
def load_model(self, fname: ModelIn) -> None:
|
||||
"""Load the model from a file or a bytearray.
|
||||
|
||||
The model is loaded from XGBoost format which is universal among the various
|
||||
XGBoost interfaces. Auxiliary attributes of the Python Booster object (such as
|
||||
feature_names) will not be loaded when using binary format. To save those
|
||||
attributes, use JSON/UBJ instead. See :doc:`Model IO </tutorials/saving_model>`
|
||||
for more info.
|
||||
The model is saved in an XGBoost internal format which is universal among the
|
||||
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
|
||||
(such as feature_names) are only saved when using JSON or UBJSON (default)
|
||||
format. See :doc:`Model IO </tutorials/saving_model>` for more info.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@@ -2769,9 +2770,9 @@ class Booster:
|
||||
with_stats: bool = False,
|
||||
dump_format: str = "text",
|
||||
) -> List[str]:
|
||||
"""Returns the model dump as a list of strings. Unlike :py:meth:`save_model`, the output
|
||||
format is primarily used for visualization or interpretation, hence it's more
|
||||
human readable but cannot be loaded back to XGBoost.
|
||||
"""Returns the model dump as a list of strings. Unlike :py:meth:`save_model`,
|
||||
the output format is primarily used for visualization or interpretation, hence
|
||||
it's more human readable but cannot be loaded back to XGBoost.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
Reference in New Issue
Block a user