Use UBJ in Python checkpoint. (#9958)
This commit is contained in:
parent
fa5e2f6c45
commit
b3eb5d0945
@ -7,6 +7,7 @@ Demo for using and defining callback functions
|
|||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
@ -17,24 +18,26 @@ import xgboost as xgb
|
|||||||
|
|
||||||
|
|
||||||
class Plotting(xgb.callback.TrainingCallback):
|
class Plotting(xgb.callback.TrainingCallback):
|
||||||
"""Plot evaluation result during training. Only for demonstration purpose as it's quite
|
"""Plot evaluation result during training. Only for demonstration purpose as it's
|
||||||
slow to draw.
|
quite slow to draw using matplotlib.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, rounds):
|
def __init__(self, rounds: int) -> None:
|
||||||
self.fig = plt.figure()
|
self.fig = plt.figure()
|
||||||
self.ax = self.fig.add_subplot(111)
|
self.ax = self.fig.add_subplot(111)
|
||||||
self.rounds = rounds
|
self.rounds = rounds
|
||||||
self.lines = {}
|
self.lines: Dict[str, plt.Line2D] = {}
|
||||||
self.fig.show()
|
self.fig.show()
|
||||||
self.x = np.linspace(0, self.rounds, self.rounds)
|
self.x = np.linspace(0, self.rounds, self.rounds)
|
||||||
plt.ion()
|
plt.ion()
|
||||||
|
|
||||||
def _get_key(self, data, metric):
|
def _get_key(self, data: str, metric: str) -> str:
|
||||||
return f"{data}-{metric}"
|
return f"{data}-{metric}"
|
||||||
|
|
||||||
def after_iteration(self, model, epoch, evals_log):
|
def after_iteration(
|
||||||
|
self, model: xgb.Booster, epoch: int, evals_log: Dict[str, dict]
|
||||||
|
) -> bool:
|
||||||
"""Update the plot."""
|
"""Update the plot."""
|
||||||
if not self.lines:
|
if not self.lines:
|
||||||
for data, metric in evals_log.items():
|
for data, metric in evals_log.items():
|
||||||
@ -55,7 +58,7 @@ class Plotting(xgb.callback.TrainingCallback):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def custom_callback():
|
def custom_callback() -> None:
|
||||||
"""Demo for defining a custom callback function that plots evaluation result during
|
"""Demo for defining a custom callback function that plots evaluation result during
|
||||||
training."""
|
training."""
|
||||||
X, y = load_breast_cancer(return_X_y=True)
|
X, y = load_breast_cancer(return_X_y=True)
|
||||||
@ -82,19 +85,27 @@ def custom_callback():
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def check_point_callback():
|
def check_point_callback() -> None:
|
||||||
# only for demo, set a larger value (like 100) in practice as checkpointing is quite
|
"""Demo for using the checkpoint callback. Custom logic for handling output is
|
||||||
|
usually required and users are encouraged to define their own callback for
|
||||||
|
checkpointing operations. The builtin one can be used as a starting point.
|
||||||
|
|
||||||
|
"""
|
||||||
|
# Only for demo, set a larger value (like 100) in practice as checkpointing is quite
|
||||||
# slow.
|
# slow.
|
||||||
rounds = 2
|
rounds = 2
|
||||||
|
|
||||||
def check(as_pickle):
|
def check(as_pickle: bool) -> None:
|
||||||
for i in range(0, 10, rounds):
|
for i in range(0, 10, rounds):
|
||||||
if i == 0:
|
if i == 0:
|
||||||
continue
|
continue
|
||||||
if as_pickle:
|
if as_pickle:
|
||||||
path = os.path.join(tmpdir, "model_" + str(i) + ".pkl")
|
path = os.path.join(tmpdir, "model_" + str(i) + ".pkl")
|
||||||
else:
|
else:
|
||||||
path = os.path.join(tmpdir, "model_" + str(i) + ".json")
|
path = os.path.join(
|
||||||
|
tmpdir,
|
||||||
|
f"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}",
|
||||||
|
)
|
||||||
assert os.path.exists(path)
|
assert os.path.exists(path)
|
||||||
|
|
||||||
X, y = load_breast_cancer(return_X_y=True)
|
X, y = load_breast_cancer(return_X_y=True)
|
||||||
|
|||||||
@ -88,22 +88,18 @@ Callback API
|
|||||||
|
|
||||||
.. autoclass:: xgboost.callback.EvaluationMonitor
|
.. autoclass:: xgboost.callback.EvaluationMonitor
|
||||||
:members:
|
:members:
|
||||||
:inherited-members:
|
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
.. autoclass:: xgboost.callback.EarlyStopping
|
.. autoclass:: xgboost.callback.EarlyStopping
|
||||||
:members:
|
:members:
|
||||||
:inherited-members:
|
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
.. autoclass:: xgboost.callback.LearningRateScheduler
|
.. autoclass:: xgboost.callback.LearningRateScheduler
|
||||||
:members:
|
:members:
|
||||||
:inherited-members:
|
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
.. autoclass:: xgboost.callback.TrainingCheckPoint
|
.. autoclass:: xgboost.callback.TrainingCheckPoint
|
||||||
:members:
|
:members:
|
||||||
:inherited-members:
|
|
||||||
:show-inheritance:
|
:show-inheritance:
|
||||||
|
|
||||||
.. _dask_api:
|
.. _dask_api:
|
||||||
|
|||||||
@ -62,11 +62,31 @@ class TrainingCallback(ABC):
|
|||||||
return model
|
return model
|
||||||
|
|
||||||
def before_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
|
def before_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
|
||||||
"""Run before each iteration. Return True when training should stop."""
|
"""Run before each iteration. Returns True when training should stop. See
|
||||||
|
:py:meth:`after_iteration` for details.
|
||||||
|
|
||||||
|
"""
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
|
def after_iteration(self, model: _Model, epoch: int, evals_log: EvalsLog) -> bool:
|
||||||
"""Run after each iteration. Return True when training should stop."""
|
"""Run after each iteration. Returns `True` when training should stop.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
model :
|
||||||
|
Eeither a :py:class:`~xgboost.Booster` object or a CVPack if the cv function
|
||||||
|
in xgboost is being used.
|
||||||
|
epoch :
|
||||||
|
The current training iteration.
|
||||||
|
evals_log :
|
||||||
|
A dictionary containing the evaluation history:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
{"data_name": {"metric_name": [0.5, ...]}}
|
||||||
|
|
||||||
|
"""
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
@ -547,14 +567,16 @@ class TrainingCheckPoint(TrainingCallback):
|
|||||||
|
|
||||||
.. versionadded:: 1.3.0
|
.. versionadded:: 1.3.0
|
||||||
|
|
||||||
|
Since XGBoost 2.1.0, the default format is changed to UBJSON.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
|
|
||||||
directory :
|
directory :
|
||||||
Output model directory.
|
Output model directory.
|
||||||
name :
|
name :
|
||||||
pattern of output model file. Models will be saved as name_0.json, name_1.json,
|
pattern of output model file. Models will be saved as name_0.ubj, name_1.ubj,
|
||||||
name_2.json ....
|
name_2.ubj ....
|
||||||
as_pickle :
|
as_pickle :
|
||||||
When set to True, all training parameters will be saved in pickle format,
|
When set to True, all training parameters will be saved in pickle format,
|
||||||
instead of saving only the model.
|
instead of saving only the model.
|
||||||
@ -564,6 +586,8 @@ class TrainingCheckPoint(TrainingCallback):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
default_format = "ubj"
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
directory: Union[str, os.PathLike],
|
directory: Union[str, os.PathLike],
|
||||||
@ -592,7 +616,7 @@ class TrainingCheckPoint(TrainingCallback):
|
|||||||
self._name
|
self._name
|
||||||
+ "_"
|
+ "_"
|
||||||
+ (str(epoch + self._start))
|
+ (str(epoch + self._start))
|
||||||
+ (".pkl" if self._as_pickle else ".json"),
|
+ (".pkl" if self._as_pickle else f".{self.default_format}"),
|
||||||
)
|
)
|
||||||
self._epoch = 0 # reset counter
|
self._epoch = 0 # reset counter
|
||||||
if collective.get_rank() == 0:
|
if collective.get_rank() == 0:
|
||||||
|
|||||||
@ -2591,9 +2591,8 @@ class Booster:
|
|||||||
|
|
||||||
The model is saved in an XGBoost internal format which is universal among the
|
The model is saved in an XGBoost internal format which is universal among the
|
||||||
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
|
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
|
||||||
(such as feature_names) will not be saved when using binary format. To save
|
(such as feature_names) are only saved when using JSON or UBJSON (default)
|
||||||
those attributes, use JSON/UBJ instead. See :doc:`Model IO
|
format. See :doc:`Model IO </tutorials/saving_model>` for more info.
|
||||||
</tutorials/saving_model>` for more info.
|
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
@ -2616,12 +2615,15 @@ class Booster:
|
|||||||
def save_raw(self, raw_format: str = "ubj") -> bytearray:
|
def save_raw(self, raw_format: str = "ubj") -> bytearray:
|
||||||
"""Save the model to a in memory buffer representation instead of file.
|
"""Save the model to a in memory buffer representation instead of file.
|
||||||
|
|
||||||
|
The model is saved in an XGBoost internal format which is universal among the
|
||||||
|
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
|
||||||
|
(such as feature_names) are only saved when using JSON or UBJSON (default)
|
||||||
|
format. See :doc:`Model IO </tutorials/saving_model>` for more info.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
raw_format :
|
raw_format :
|
||||||
Format of output buffer. Can be `json`, `ubj` or `deprecated`. Right now
|
Format of output buffer. Can be `json`, `ubj` or `deprecated`.
|
||||||
the default is `deprecated` but it will be changed to `ubj` (univeral binary
|
|
||||||
json) in the future.
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@ -2640,11 +2642,10 @@ class Booster:
|
|||||||
def load_model(self, fname: ModelIn) -> None:
|
def load_model(self, fname: ModelIn) -> None:
|
||||||
"""Load the model from a file or a bytearray.
|
"""Load the model from a file or a bytearray.
|
||||||
|
|
||||||
The model is loaded from XGBoost format which is universal among the various
|
The model is saved in an XGBoost internal format which is universal among the
|
||||||
XGBoost interfaces. Auxiliary attributes of the Python Booster object (such as
|
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
|
||||||
feature_names) will not be loaded when using binary format. To save those
|
(such as feature_names) are only saved when using JSON or UBJSON (default)
|
||||||
attributes, use JSON/UBJ instead. See :doc:`Model IO </tutorials/saving_model>`
|
format. See :doc:`Model IO </tutorials/saving_model>` for more info.
|
||||||
for more info.
|
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
@ -2769,9 +2770,9 @@ class Booster:
|
|||||||
with_stats: bool = False,
|
with_stats: bool = False,
|
||||||
dump_format: str = "text",
|
dump_format: str = "text",
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""Returns the model dump as a list of strings. Unlike :py:meth:`save_model`, the output
|
"""Returns the model dump as a list of strings. Unlike :py:meth:`save_model`,
|
||||||
format is primarily used for visualization or interpretation, hence it's more
|
the output format is primarily used for visualization or interpretation, hence
|
||||||
human readable but cannot be loaded back to XGBoost.
|
it's more human readable but cannot be loaded back to XGBoost.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
|
|||||||
@ -31,6 +31,8 @@ class LintersPaths:
|
|||||||
"tests/python/test_with_pandas.py",
|
"tests/python/test_with_pandas.py",
|
||||||
"tests/python-gpu/",
|
"tests/python-gpu/",
|
||||||
"tests/python-sycl/",
|
"tests/python-sycl/",
|
||||||
|
"tests/test_distributed/test_with_dask/",
|
||||||
|
"tests/test_distributed/test_gpu_with_dask/",
|
||||||
"tests/test_distributed/test_with_spark/",
|
"tests/test_distributed/test_with_spark/",
|
||||||
"tests/test_distributed/test_gpu_with_spark/",
|
"tests/test_distributed/test_gpu_with_spark/",
|
||||||
# demo
|
# demo
|
||||||
@ -91,6 +93,7 @@ class LintersPaths:
|
|||||||
# demo
|
# demo
|
||||||
"demo/json-model/json_parser.py",
|
"demo/json-model/json_parser.py",
|
||||||
"demo/guide-python/external_memory.py",
|
"demo/guide-python/external_memory.py",
|
||||||
|
"demo/guide-python/callbacks.py",
|
||||||
"demo/guide-python/cat_in_the_dat.py",
|
"demo/guide-python/cat_in_the_dat.py",
|
||||||
"demo/guide-python/categorical.py",
|
"demo/guide-python/categorical.py",
|
||||||
"demo/guide-python/cat_pipeline.py",
|
"demo/guide-python/cat_pipeline.py",
|
||||||
|
|||||||
@ -244,7 +244,7 @@ class TestCallbacks:
|
|||||||
assert booster.num_boosted_rounds() == booster.best_iteration + 1
|
assert booster.num_boosted_rounds() == booster.best_iteration + 1
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
path = os.path.join(tmpdir, 'model.json')
|
path = os.path.join(tmpdir, "model.json")
|
||||||
cls.save_model(path)
|
cls.save_model(path)
|
||||||
cls = xgb.XGBClassifier()
|
cls = xgb.XGBClassifier()
|
||||||
cls.load_model(path)
|
cls.load_model(path)
|
||||||
@ -378,7 +378,7 @@ class TestCallbacks:
|
|||||||
scheduler = xgb.callback.LearningRateScheduler
|
scheduler = xgb.callback.LearningRateScheduler
|
||||||
|
|
||||||
dtrain, dtest = tm.load_agaricus(__file__)
|
dtrain, dtest = tm.load_agaricus(__file__)
|
||||||
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
|
watchlist = [(dtest, "eval"), (dtrain, "train")]
|
||||||
|
|
||||||
param = {
|
param = {
|
||||||
"max_depth": 2,
|
"max_depth": 2,
|
||||||
@ -429,7 +429,7 @@ class TestCallbacks:
|
|||||||
assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"]
|
assert tree_3th_0["split_conditions"] != tree_3th_1["split_conditions"]
|
||||||
|
|
||||||
@pytest.mark.parametrize("tree_method", ["hist", "approx", "approx"])
|
@pytest.mark.parametrize("tree_method", ["hist", "approx", "approx"])
|
||||||
def test_eta_decay(self, tree_method):
|
def test_eta_decay(self, tree_method: str) -> None:
|
||||||
self.run_eta_decay(tree_method)
|
self.run_eta_decay(tree_method)
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
@ -446,7 +446,7 @@ class TestCallbacks:
|
|||||||
def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
|
def test_eta_decay_leaf_output(self, tree_method: str, objective: str) -> None:
|
||||||
self.run_eta_decay_leaf_output(tree_method, objective)
|
self.run_eta_decay_leaf_output(tree_method, objective)
|
||||||
|
|
||||||
def test_check_point(self):
|
def test_check_point(self) -> None:
|
||||||
from sklearn.datasets import load_breast_cancer
|
from sklearn.datasets import load_breast_cancer
|
||||||
|
|
||||||
X, y = load_breast_cancer(return_X_y=True)
|
X, y = load_breast_cancer(return_X_y=True)
|
||||||
@ -463,7 +463,12 @@ class TestCallbacks:
|
|||||||
callbacks=[check_point],
|
callbacks=[check_point],
|
||||||
)
|
)
|
||||||
for i in range(1, 10):
|
for i in range(1, 10):
|
||||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
|
assert os.path.exists(
|
||||||
|
os.path.join(
|
||||||
|
tmpdir,
|
||||||
|
f"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
check_point = xgb.callback.TrainingCheckPoint(
|
check_point = xgb.callback.TrainingCheckPoint(
|
||||||
directory=tmpdir, interval=1, as_pickle=True, name="model"
|
directory=tmpdir, interval=1, as_pickle=True, name="model"
|
||||||
@ -478,7 +483,7 @@ class TestCallbacks:
|
|||||||
for i in range(1, 10):
|
for i in range(1, 10):
|
||||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))
|
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".pkl"))
|
||||||
|
|
||||||
def test_callback_list(self):
|
def test_callback_list(self) -> None:
|
||||||
X, y = tm.data.get_california_housing()
|
X, y = tm.data.get_california_housing()
|
||||||
m = xgb.DMatrix(X, y)
|
m = xgb.DMatrix(X, y)
|
||||||
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
|
callbacks = [xgb.callback.EarlyStopping(rounds=10)]
|
||||||
|
|||||||
@ -1590,7 +1590,7 @@ class TestWithDask:
|
|||||||
@given(
|
@given(
|
||||||
params=hist_parameter_strategy,
|
params=hist_parameter_strategy,
|
||||||
cache_param=hist_cache_strategy,
|
cache_param=hist_cache_strategy,
|
||||||
dataset=tm.make_dataset_strategy()
|
dataset=tm.make_dataset_strategy(),
|
||||||
)
|
)
|
||||||
@settings(
|
@settings(
|
||||||
deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True
|
deadline=None, max_examples=10, suppress_health_check=suppress, print_blob=True
|
||||||
@ -2250,16 +2250,27 @@ class TestDaskCallbacks:
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
for i in range(1, 10):
|
for i in range(1, 10):
|
||||||
assert os.path.exists(os.path.join(tmpdir, "model_" + str(i) + ".json"))
|
assert os.path.exists(
|
||||||
|
os.path.join(
|
||||||
|
tmpdir,
|
||||||
|
f"model_{i}.{xgb.callback.TrainingCheckPoint.default_format}",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@gen_cluster(client=True, clean_kwargs={"processes": False, "threads": False}, allow_unclosed=True)
|
@gen_cluster(
|
||||||
|
client=True,
|
||||||
|
clean_kwargs={"processes": False, "threads": False},
|
||||||
|
allow_unclosed=True,
|
||||||
|
)
|
||||||
async def test_worker_left(c, s, a, b):
|
async def test_worker_left(c, s, a, b):
|
||||||
async with Worker(s.address):
|
async with Worker(s.address):
|
||||||
dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
|
dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
|
||||||
dy = da.random.random((1000,)).rechunk(chunks=(10,))
|
dy = da.random.random((1000,)).rechunk(chunks=(10,))
|
||||||
d_train = await xgb.dask.DaskDMatrix(
|
d_train = await xgb.dask.DaskDMatrix(
|
||||||
c, dx, dy,
|
c,
|
||||||
|
dx,
|
||||||
|
dy,
|
||||||
)
|
)
|
||||||
await async_poll_for(lambda: len(s.workers) == 2, timeout=5)
|
await async_poll_for(lambda: len(s.workers) == 2, timeout=5)
|
||||||
with pytest.raises(RuntimeError, match="Missing"):
|
with pytest.raises(RuntimeError, match="Missing"):
|
||||||
@ -2271,12 +2282,19 @@ async def test_worker_left(c, s, a, b):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@gen_cluster(client=True, Worker=Nanny, clean_kwargs={"processes": False, "threads": False}, allow_unclosed=True)
|
@gen_cluster(
|
||||||
|
client=True,
|
||||||
|
Worker=Nanny,
|
||||||
|
clean_kwargs={"processes": False, "threads": False},
|
||||||
|
allow_unclosed=True,
|
||||||
|
)
|
||||||
async def test_worker_restarted(c, s, a, b):
|
async def test_worker_restarted(c, s, a, b):
|
||||||
dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
|
dx = da.random.random((1000, 10)).rechunk(chunks=(10, None))
|
||||||
dy = da.random.random((1000,)).rechunk(chunks=(10,))
|
dy = da.random.random((1000,)).rechunk(chunks=(10,))
|
||||||
d_train = await xgb.dask.DaskDMatrix(
|
d_train = await xgb.dask.DaskDMatrix(
|
||||||
c, dx, dy,
|
c,
|
||||||
|
dx,
|
||||||
|
dy,
|
||||||
)
|
)
|
||||||
await c.restart_workers([a.worker_address])
|
await c.restart_workers([a.worker_address])
|
||||||
with pytest.raises(RuntimeError, match="Missing"):
|
with pytest.raises(RuntimeError, match="Missing"):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user