Fix parameter loading with training continuation. (#7121)
* Add a demo for training continuation.
This commit is contained in:
@@ -14,3 +14,5 @@ XGBoost Python Feature Walkthrough
|
||||
* [Sklearn access evals result](sklearn_evals_result.py)
|
||||
* [Access evals result](evals_result.py)
|
||||
* [External Memory](external_memory.py)
|
||||
* [Training continuation](continuation.py)
|
||||
* [Feature weights for column sampling](feature_weights.py)
|
||||
|
||||
109
demo/guide-python/continuation.py
Normal file
109
demo/guide-python/continuation.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""
|
||||
Demo for training continuation.
|
||||
"""
|
||||
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
import xgboost
|
||||
import pickle
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
|
||||
def training_continuation(tmpdir: str, use_pickle: bool) -> None:
|
||||
"""Basic training continuation."""
|
||||
# Train 128 iterations in 1 session
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
clf = xgboost.XGBClassifier(n_estimators=128, use_label_encoder=False)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||
|
||||
# Train 128 iterations in 2 sessions, with the first one runs for 32 iterations and
|
||||
# the second one runs for 96 iterations
|
||||
clf = xgboost.XGBClassifier(n_estimators=32, use_label_encoder=False)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss")
|
||||
assert clf.get_booster().num_boosted_rounds() == 32
|
||||
|
||||
# load back the model, this could be a checkpoint
|
||||
if use_pickle:
|
||||
path = os.path.join(tmpdir, "model-first-32.pkl")
|
||||
with open(path, "wb") as fd:
|
||||
pickle.dump(clf, fd)
|
||||
with open(path, "rb") as fd:
|
||||
loaded = pickle.load(fd)
|
||||
else:
|
||||
path = os.path.join(tmpdir, "model-first-32.json")
|
||||
clf.save_model(path)
|
||||
loaded = xgboost.XGBClassifier()
|
||||
loaded.load_model(path)
|
||||
|
||||
clf = xgboost.XGBClassifier(n_estimators=128 - 32)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", xgb_model=loaded)
|
||||
|
||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||
|
||||
assert clf.get_booster().num_boosted_rounds() == 128
|
||||
|
||||
|
||||
def training_continuation_early_stop(tmpdir: str, use_pickle: bool) -> None:
|
||||
"""Training continuation with early stopping."""
|
||||
early_stopping_rounds = 5
|
||||
early_stop = xgboost.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, save_best=True
|
||||
)
|
||||
n_estimators = 512
|
||||
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
clf = xgboost.XGBClassifier(n_estimators=n_estimators, use_label_encoder=False)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", callbacks=[early_stop])
|
||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||
best = clf.best_iteration
|
||||
|
||||
# Train 512 iterations in 2 sessions, with the first one runs for 128 iterations and
|
||||
# the second one runs until early stop.
|
||||
clf = xgboost.XGBClassifier(n_estimators=128, use_label_encoder=False)
|
||||
# Reinitialize the early stop callback
|
||||
early_stop = xgboost.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, save_best=True
|
||||
)
|
||||
clf.fit(X, y, eval_set=[(X, y)], eval_metric="logloss", callbacks=[early_stop])
|
||||
assert clf.get_booster().num_boosted_rounds() == 128
|
||||
|
||||
# load back the model, this could be a checkpoint
|
||||
if use_pickle:
|
||||
path = os.path.join(tmpdir, "model-first-128.pkl")
|
||||
with open(path, "wb") as fd:
|
||||
pickle.dump(clf, fd)
|
||||
with open(path, "rb") as fd:
|
||||
loaded = pickle.load(fd)
|
||||
else:
|
||||
path = os.path.join(tmpdir, "model-first-128.json")
|
||||
clf.save_model(path)
|
||||
loaded = xgboost.XGBClassifier(use_label_encoder=False)
|
||||
loaded.load_model(path)
|
||||
|
||||
early_stop = xgboost.callback.EarlyStopping(
|
||||
rounds=early_stopping_rounds, save_best=True
|
||||
)
|
||||
clf = xgboost.XGBClassifier(
|
||||
n_estimators=n_estimators - 128, use_label_encoder=False
|
||||
)
|
||||
clf.fit(
|
||||
X,
|
||||
y,
|
||||
eval_set=[(X, y)],
|
||||
eval_metric="logloss",
|
||||
callbacks=[early_stop],
|
||||
xgb_model=loaded,
|
||||
)
|
||||
|
||||
print("Total boosted rounds:", clf.get_booster().num_boosted_rounds())
|
||||
assert clf.best_iteration == best
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
training_continuation_early_stop(tmpdir, False)
|
||||
training_continuation_early_stop(tmpdir, True)
|
||||
|
||||
training_continuation(tmpdir, True)
|
||||
training_continuation(tmpdir, False)
|
||||
Reference in New Issue
Block a user